1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116   class DAGCombiner {
117     SelectionDAG &DAG;
118     const TargetLowering &TLI;
119     CombineLevel Level;
120     CodeGenOpt::Level OptLevel;
121     bool LegalOperations = false;
122     bool LegalTypes = false;
123     bool ForCodeSize;
124 
125     /// Worklist of all of the nodes that need to be simplified.
126     ///
127     /// This must behave as a stack -- new nodes to process are pushed onto the
128     /// back and when processing we pop off of the back.
129     ///
130     /// The worklist will not contain duplicates but may contain null entries
131     /// due to nodes being deleted from the underlying DAG.
132     SmallVector<SDNode *, 64> Worklist;
133 
134     /// Mapping from an SDNode to its position on the worklist.
135     ///
136     /// This is used to find and remove nodes from the worklist (by nulling
137     /// them) when they are deleted from the underlying DAG. It relies on
138     /// stable indices of nodes within the worklist.
139     DenseMap<SDNode *, unsigned> WorklistMap;
140 
141     /// Set of nodes which have been combined (at least once).
142     ///
143     /// This is used to allow us to reliably add any operands of a DAG node
144     /// which have not yet been combined to the worklist.
145     SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147     // AA - Used for DAG load/store alias analysis.
148     AliasAnalysis *AA;
149 
150     /// When an instruction is simplified, add all users of the instruction to
151     /// the work lists because they might get more simplified now.
152     void AddUsersToWorklist(SDNode *N) {
153       for (SDNode *Node : N->uses())
154         AddToWorklist(Node);
155     }
156 
157     /// Call the node-specific routine that folds each particular type of node.
158     SDValue visit(SDNode *N);
159 
160   public:
161     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163           OptLevel(OL), AA(AA) {
164       ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166       MaximumLegalStoreInBits = 0;
167       for (MVT VT : MVT::all_valuetypes())
168         if (EVT(VT).isSimple() && VT != MVT::Other &&
169             TLI.isTypeLegal(EVT(VT)) &&
170             VT.getSizeInBits() >= MaximumLegalStoreInBits)
171           MaximumLegalStoreInBits = VT.getSizeInBits();
172     }
173 
174     /// Add to the worklist making sure its instance is at the back (next to be
175     /// processed.)
176     void AddToWorklist(SDNode *N) {
177       assert(N->getOpcode() != ISD::DELETED_NODE &&
178              "Deleted Node added to Worklist");
179 
180       // Skip handle nodes as they can't usefully be combined and confuse the
181       // zero-use deletion strategy.
182       if (N->getOpcode() == ISD::HANDLENODE)
183         return;
184 
185       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186         Worklist.push_back(N);
187     }
188 
189     /// Remove all instances of N from the worklist.
190     void removeFromWorklist(SDNode *N) {
191       CombinedNodes.erase(N);
192 
193       auto It = WorklistMap.find(N);
194       if (It == WorklistMap.end())
195         return; // Not in the worklist.
196 
197       // Null out the entry rather than erasing it to avoid a linear operation.
198       Worklist[It->second] = nullptr;
199       WorklistMap.erase(It);
200     }
201 
202     void deleteAndRecombine(SDNode *N);
203     bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205     /// Replaces all uses of the results of one DAG node with new values.
206     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207                       bool AddTo = true);
208 
209     /// Replaces all uses of the results of one DAG node with new values.
210     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211       return CombineTo(N, &Res, 1, AddTo);
212     }
213 
214     /// Replaces all uses of the results of one DAG node with new values.
215     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216                       bool AddTo = true) {
217       SDValue To[] = { Res0, Res1 };
218       return CombineTo(N, To, 2, AddTo);
219     }
220 
221     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223   private:
224     unsigned MaximumLegalStoreInBits;
225 
226     /// Check the specified integer node value to see if it can be simplified or
227     /// if things it uses can be simplified by bit propagation.
228     /// If so, return true.
229     bool SimplifyDemandedBits(SDValue Op) {
230       unsigned BitWidth = Op.getScalarValueSizeInBits();
231       APInt Demanded = APInt::getAllOnesValue(BitWidth);
232       return SimplifyDemandedBits(Op, Demanded);
233     }
234 
235     /// Check the specified vector node value to see if it can be simplified or
236     /// if things it uses can be simplified as it only uses some of the
237     /// elements. If so, return true.
238     bool SimplifyDemandedVectorElts(SDValue Op) {
239       unsigned NumElts = Op.getValueType().getVectorNumElements();
240       APInt Demanded = APInt::getAllOnesValue(NumElts);
241       return SimplifyDemandedVectorElts(Op, Demanded);
242     }
243 
244     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
246                                     bool AssumeSingleUse = false);
247 
248     bool CombineToPreIndexedLoadStore(SDNode *N);
249     bool CombineToPostIndexedLoadStore(SDNode *N);
250     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
251     bool SliceUpLoad(SDNode *N);
252 
253     // Scalars have size 0 to distinguish from singleton vectors.
254     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
255     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
256     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
257 
258     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
259     ///   load.
260     ///
261     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
262     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
263     /// \param EltNo index of the vector element to load.
264     /// \param OriginalLoad load that EVE came from to be replaced.
265     /// \returns EVE on success SDValue() on failure.
266     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
267                                          SDValue EltNo,
268                                          LoadSDNode *OriginalLoad);
269     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
270     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
271     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
272     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
273     SDValue PromoteIntBinOp(SDValue Op);
274     SDValue PromoteIntShiftOp(SDValue Op);
275     SDValue PromoteExtend(SDValue Op);
276     bool PromoteLoad(SDValue Op);
277 
278     /// Call the node-specific routine that knows how to fold each
279     /// particular type of node. If that doesn't do anything, try the
280     /// target-specific DAG combines.
281     SDValue combine(SDNode *N);
282 
283     // Visitation implementation - Implement dag node combining for different
284     // node types.  The semantics are as follows:
285     // Return Value:
286     //   SDValue.getNode() == 0 - No change was made
287     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
288     //   otherwise              - N should be replaced by the returned Operand.
289     //
290     SDValue visitTokenFactor(SDNode *N);
291     SDValue visitMERGE_VALUES(SDNode *N);
292     SDValue visitADD(SDNode *N);
293     SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
294     SDValue visitSUB(SDNode *N);
295     SDValue visitADDSAT(SDNode *N);
296     SDValue visitSUBSAT(SDNode *N);
297     SDValue visitADDC(SDNode *N);
298     SDValue visitADDO(SDNode *N);
299     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
300     SDValue visitSUBC(SDNode *N);
301     SDValue visitSUBO(SDNode *N);
302     SDValue visitADDE(SDNode *N);
303     SDValue visitADDCARRY(SDNode *N);
304     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
305     SDValue visitSUBE(SDNode *N);
306     SDValue visitSUBCARRY(SDNode *N);
307     SDValue visitMUL(SDNode *N);
308     SDValue useDivRem(SDNode *N);
309     SDValue visitSDIV(SDNode *N);
310     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
311     SDValue visitUDIV(SDNode *N);
312     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
313     SDValue visitREM(SDNode *N);
314     SDValue visitMULHU(SDNode *N);
315     SDValue visitMULHS(SDNode *N);
316     SDValue visitSMUL_LOHI(SDNode *N);
317     SDValue visitUMUL_LOHI(SDNode *N);
318     SDValue visitMULO(SDNode *N);
319     SDValue visitIMINMAX(SDNode *N);
320     SDValue visitAND(SDNode *N);
321     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
322     SDValue visitOR(SDNode *N);
323     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
324     SDValue visitXOR(SDNode *N);
325     SDValue SimplifyVBinOp(SDNode *N);
326     SDValue visitSHL(SDNode *N);
327     SDValue visitSRA(SDNode *N);
328     SDValue visitSRL(SDNode *N);
329     SDValue visitFunnelShift(SDNode *N);
330     SDValue visitRotate(SDNode *N);
331     SDValue visitABS(SDNode *N);
332     SDValue visitBSWAP(SDNode *N);
333     SDValue visitBITREVERSE(SDNode *N);
334     SDValue visitCTLZ(SDNode *N);
335     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
336     SDValue visitCTTZ(SDNode *N);
337     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
338     SDValue visitCTPOP(SDNode *N);
339     SDValue visitSELECT(SDNode *N);
340     SDValue visitVSELECT(SDNode *N);
341     SDValue visitSELECT_CC(SDNode *N);
342     SDValue visitSETCC(SDNode *N);
343     SDValue visitSETCCCARRY(SDNode *N);
344     SDValue visitSIGN_EXTEND(SDNode *N);
345     SDValue visitZERO_EXTEND(SDNode *N);
346     SDValue visitANY_EXTEND(SDNode *N);
347     SDValue visitAssertExt(SDNode *N);
348     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
349     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
350     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
351     SDValue visitTRUNCATE(SDNode *N);
352     SDValue visitBITCAST(SDNode *N);
353     SDValue visitBUILD_PAIR(SDNode *N);
354     SDValue visitFADD(SDNode *N);
355     SDValue visitFSUB(SDNode *N);
356     SDValue visitFMUL(SDNode *N);
357     SDValue visitFMA(SDNode *N);
358     SDValue visitFDIV(SDNode *N);
359     SDValue visitFREM(SDNode *N);
360     SDValue visitFSQRT(SDNode *N);
361     SDValue visitFCOPYSIGN(SDNode *N);
362     SDValue visitFPOW(SDNode *N);
363     SDValue visitSINT_TO_FP(SDNode *N);
364     SDValue visitUINT_TO_FP(SDNode *N);
365     SDValue visitFP_TO_SINT(SDNode *N);
366     SDValue visitFP_TO_UINT(SDNode *N);
367     SDValue visitFP_ROUND(SDNode *N);
368     SDValue visitFP_ROUND_INREG(SDNode *N);
369     SDValue visitFP_EXTEND(SDNode *N);
370     SDValue visitFNEG(SDNode *N);
371     SDValue visitFABS(SDNode *N);
372     SDValue visitFCEIL(SDNode *N);
373     SDValue visitFTRUNC(SDNode *N);
374     SDValue visitFFLOOR(SDNode *N);
375     SDValue visitFMINNUM(SDNode *N);
376     SDValue visitFMAXNUM(SDNode *N);
377     SDValue visitFMINIMUM(SDNode *N);
378     SDValue visitFMAXIMUM(SDNode *N);
379     SDValue visitBRCOND(SDNode *N);
380     SDValue visitBR_CC(SDNode *N);
381     SDValue visitLOAD(SDNode *N);
382 
383     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
384     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
385 
386     SDValue visitSTORE(SDNode *N);
387     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
388     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
389     SDValue visitBUILD_VECTOR(SDNode *N);
390     SDValue visitCONCAT_VECTORS(SDNode *N);
391     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
392     SDValue visitVECTOR_SHUFFLE(SDNode *N);
393     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
394     SDValue visitINSERT_SUBVECTOR(SDNode *N);
395     SDValue visitMLOAD(SDNode *N);
396     SDValue visitMSTORE(SDNode *N);
397     SDValue visitMGATHER(SDNode *N);
398     SDValue visitMSCATTER(SDNode *N);
399     SDValue visitFP_TO_FP16(SDNode *N);
400     SDValue visitFP16_TO_FP(SDNode *N);
401 
402     SDValue visitFADDForFMACombine(SDNode *N);
403     SDValue visitFSUBForFMACombine(SDNode *N);
404     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
405 
406     SDValue XformToShuffleWithZero(SDNode *N);
407     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
408                            SDValue N1, SDNodeFlags Flags);
409 
410     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
411 
412     SDValue foldSelectOfConstants(SDNode *N);
413     SDValue foldVSelectOfConstants(SDNode *N);
414     SDValue foldBinOpIntoSelect(SDNode *BO);
415     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
416     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
417     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
418     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
419                              SDValue N2, SDValue N3, ISD::CondCode CC,
420                              bool NotExtCompare = false);
421     SDValue convertSelectOfFPConstantsToLoadOffset(
422         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
423         ISD::CondCode CC);
424     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
425                                    SDValue N2, SDValue N3, ISD::CondCode CC);
426     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
427                               const SDLoc &DL);
428     SDValue unfoldMaskedMerge(SDNode *N);
429     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
430     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
431                           const SDLoc &DL, bool foldBooleans);
432     SDValue rebuildSetCC(SDValue N);
433 
434     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
435                            SDValue &CC) const;
436     bool isOneUseSetCC(SDValue N) const;
437 
438     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
439                                          unsigned HiOp);
440     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
441     SDValue CombineExtLoad(SDNode *N);
442     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
443     SDValue combineRepeatedFPDivisors(SDNode *N);
444     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
445     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
446     SDValue BuildSDIV(SDNode *N);
447     SDValue BuildSDIVPow2(SDNode *N);
448     SDValue BuildUDIV(SDNode *N);
449     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
450     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
451     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
452     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
453     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
454     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
455                                 SDNodeFlags Flags, bool Reciprocal);
456     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
457                                 SDNodeFlags Flags, bool Reciprocal);
458     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
459                                bool DemandHighBits = true);
460     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
461     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
462                               SDValue InnerPos, SDValue InnerNeg,
463                               unsigned PosOpcode, unsigned NegOpcode,
464                               const SDLoc &DL);
465     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
466     SDValue MatchLoadCombine(SDNode *N);
467     SDValue ReduceLoadWidth(SDNode *N);
468     SDValue ReduceLoadOpStoreWidth(SDNode *N);
469     SDValue splitMergedValStore(StoreSDNode *ST);
470     SDValue TransformFPLoadStorePair(SDNode *N);
471     SDValue convertBuildVecZextToZext(SDNode *N);
472     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
473     SDValue reduceBuildVecToShuffle(SDNode *N);
474     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
475                                   ArrayRef<int> VectorMask, SDValue VecIn1,
476                                   SDValue VecIn2, unsigned LeftIdx);
477     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
478 
479     /// Walk up chain skipping non-aliasing memory nodes,
480     /// looking for aliasing nodes and adding them to the Aliases vector.
481     void GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
482                           SmallVectorImpl<SDValue> &Aliases);
483 
484     /// Return true if there is any possibility that the two addresses overlap.
485     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
486 
487     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
488     /// chain (aliasing node.)
489     SDValue FindBetterChain(LSBaseSDNode *N, SDValue Chain);
490 
491     /// Try to replace a store and any possibly adjacent stores on
492     /// consecutive chains with better chains. Return true only if St is
493     /// replaced.
494     ///
495     /// Notice that other chains may still be replaced even if the function
496     /// returns false.
497     bool findBetterNeighborChains(StoreSDNode *St);
498 
499     // Helper for findBetterNeighborChains. Walk up store chain add additional
500     // chained stores that do not overlap and can be parallelized.
501     bool parallelizeChainedStores(StoreSDNode *St);
502 
503     /// Holds a pointer to an LSBaseSDNode as well as information on where it
504     /// is located in a sequence of memory operations connected by a chain.
505     struct MemOpLink {
506       // Ptr to the mem node.
507       LSBaseSDNode *MemNode;
508 
509       // Offset from the base ptr.
510       int64_t OffsetFromBase;
511 
512       MemOpLink(LSBaseSDNode *N, int64_t Offset)
513           : MemNode(N), OffsetFromBase(Offset) {}
514     };
515 
516     /// This is a helper function for visitMUL to check the profitability
517     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
518     /// MulNode is the original multiply, AddNode is (add x, c1),
519     /// and ConstNode is c2.
520     bool isMulAddWithConstProfitable(SDNode *MulNode,
521                                      SDValue &AddNode,
522                                      SDValue &ConstNode);
523 
524     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
525     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
526     /// the type of the loaded value to be extended.
527     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
528                           EVT LoadResultTy, EVT &ExtVT);
529 
530     /// Helper function to calculate whether the given Load/Store can have its
531     /// width reduced to ExtVT.
532     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
533                            EVT &MemVT, unsigned ShAmt = 0);
534 
535     /// Used by BackwardsPropagateMask to find suitable loads.
536     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
537                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
538                            ConstantSDNode *Mask, SDNode *&NodeToMask);
539     /// Attempt to propagate a given AND node back to load leaves so that they
540     /// can be combined into narrow loads.
541     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
542 
543     /// Helper function for MergeConsecutiveStores which merges the
544     /// component store chains.
545     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
546                                 unsigned NumStores);
547 
548     /// This is a helper function for MergeConsecutiveStores. When the
549     /// source elements of the consecutive stores are all constants or
550     /// all extracted vector elements, try to merge them into one
551     /// larger store introducing bitcasts if necessary.  \return True
552     /// if a merged store was created.
553     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
554                                          EVT MemVT, unsigned NumStores,
555                                          bool IsConstantSrc, bool UseVector,
556                                          bool UseTrunc);
557 
558     /// This is a helper function for MergeConsecutiveStores. Stores
559     /// that potentially may be merged with St are placed in
560     /// StoreNodes. RootNode is a chain predecessor to all store
561     /// candidates.
562     void getStoreMergeCandidates(StoreSDNode *St,
563                                  SmallVectorImpl<MemOpLink> &StoreNodes,
564                                  SDNode *&Root);
565 
566     /// Helper function for MergeConsecutiveStores. Checks if
567     /// candidate stores have indirect dependency through their
568     /// operands. RootNode is the predecessor to all stores calculated
569     /// by getStoreMergeCandidates and is used to prune the dependency check.
570     /// \return True if safe to merge.
571     bool checkMergeStoreCandidatesForDependencies(
572         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
573         SDNode *RootNode);
574 
575     /// Merge consecutive store operations into a wide store.
576     /// This optimization uses wide integers or vectors when possible.
577     /// \return number of stores that were merged into a merged store (the
578     /// affected nodes are stored as a prefix in \p StoreNodes).
579     bool MergeConsecutiveStores(StoreSDNode *St);
580 
581     /// Try to transform a truncation where C is a constant:
582     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
583     ///
584     /// \p N needs to be a truncation and its first operand an AND. Other
585     /// requirements are checked by the function (e.g. that trunc is
586     /// single-use) and if missed an empty SDValue is returned.
587     SDValue distributeTruncateThroughAnd(SDNode *N);
588 
589     /// Helper function to determine whether the target supports operation
590     /// given by \p Opcode for type \p VT, that is, whether the operation
591     /// is legal or custom before legalizing operations, and whether is
592     /// legal (but not custom) after legalization.
593     bool hasOperation(unsigned Opcode, EVT VT) {
594       if (LegalOperations)
595         return TLI.isOperationLegal(Opcode, VT);
596       return TLI.isOperationLegalOrCustom(Opcode, VT);
597     }
598 
599   public:
600     /// Runs the dag combiner on all nodes in the work list
601     void Run(CombineLevel AtLevel);
602 
603     SelectionDAG &getDAG() const { return DAG; }
604 
605     /// Returns a type large enough to hold any valid shift amount - before type
606     /// legalization these can be huge.
607     EVT getShiftAmountTy(EVT LHSTy) {
608       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
609       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
610     }
611 
612     /// This method returns true if we are running before type legalization or
613     /// if the specified VT is legal.
614     bool isTypeLegal(const EVT &VT) {
615       if (!LegalTypes) return true;
616       return TLI.isTypeLegal(VT);
617     }
618 
619     /// Convenience wrapper around TargetLowering::getSetCCResultType
620     EVT getSetCCResultType(EVT VT) const {
621       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
622     }
623 
624     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
625                          SDValue OrigLoad, SDValue ExtLoad,
626                          ISD::NodeType ExtType);
627   };
628 
629 /// This class is a DAGUpdateListener that removes any deleted
630 /// nodes from the worklist.
631 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
632   DAGCombiner &DC;
633 
634 public:
635   explicit WorklistRemover(DAGCombiner &dc)
636     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
637 
638   void NodeDeleted(SDNode *N, SDNode *E) override {
639     DC.removeFromWorklist(N);
640   }
641 };
642 
643 } // end anonymous namespace
644 
645 //===----------------------------------------------------------------------===//
646 //  TargetLowering::DAGCombinerInfo implementation
647 //===----------------------------------------------------------------------===//
648 
649 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
650   ((DAGCombiner*)DC)->AddToWorklist(N);
651 }
652 
653 SDValue TargetLowering::DAGCombinerInfo::
654 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
655   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
656 }
657 
658 SDValue TargetLowering::DAGCombinerInfo::
659 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
660   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
661 }
662 
663 SDValue TargetLowering::DAGCombinerInfo::
664 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
665   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
666 }
667 
668 void TargetLowering::DAGCombinerInfo::
669 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
670   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
671 }
672 
673 //===----------------------------------------------------------------------===//
674 // Helper Functions
675 //===----------------------------------------------------------------------===//
676 
677 void DAGCombiner::deleteAndRecombine(SDNode *N) {
678   removeFromWorklist(N);
679 
680   // If the operands of this node are only used by the node, they will now be
681   // dead. Make sure to re-visit them and recursively delete dead nodes.
682   for (const SDValue &Op : N->ops())
683     // For an operand generating multiple values, one of the values may
684     // become dead allowing further simplification (e.g. split index
685     // arithmetic from an indexed load).
686     if (Op->hasOneUse() || Op->getNumValues() > 1)
687       AddToWorklist(Op.getNode());
688 
689   DAG.DeleteNode(N);
690 }
691 
692 /// Return 1 if we can compute the negated form of the specified expression for
693 /// the same cost as the expression itself, or 2 if we can compute the negated
694 /// form more cheaply than the expression itself.
695 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
696                                const TargetLowering &TLI,
697                                const TargetOptions *Options,
698                                unsigned Depth = 0) {
699   // fneg is removable even if it has multiple uses.
700   if (Op.getOpcode() == ISD::FNEG) return 2;
701 
702   // Don't allow anything with multiple uses unless we know it is free.
703   EVT VT = Op.getValueType();
704   const SDNodeFlags Flags = Op->getFlags();
705   if (!Op.hasOneUse())
706     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
707           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
708       return 0;
709 
710   // Don't recurse exponentially.
711   if (Depth > 6) return 0;
712 
713   switch (Op.getOpcode()) {
714   default: return false;
715   case ISD::ConstantFP: {
716     if (!LegalOperations)
717       return 1;
718 
719     // Don't invert constant FP values after legalization unless the target says
720     // the negated constant is legal.
721     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
722       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
723   }
724   case ISD::FADD:
725     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
726       return 0;
727 
728     // After operation legalization, it might not be legal to create new FSUBs.
729     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
730       return 0;
731 
732     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
733     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
734                                     Options, Depth + 1))
735       return V;
736     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
737     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
738                               Depth + 1);
739   case ISD::FSUB:
740     // We can't turn -(A-B) into B-A when we honor signed zeros.
741     if (!Options->NoSignedZerosFPMath &&
742         !Flags.hasNoSignedZeros())
743       return 0;
744 
745     // fold (fneg (fsub A, B)) -> (fsub B, A)
746     return 1;
747 
748   case ISD::FMUL:
749   case ISD::FDIV:
750     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
751     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
752                                     Options, Depth + 1))
753       return V;
754 
755     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
756                               Depth + 1);
757 
758   case ISD::FP_EXTEND:
759   case ISD::FP_ROUND:
760   case ISD::FSIN:
761     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
762                               Depth + 1);
763   }
764 }
765 
766 /// If isNegatibleForFree returns true, return the newly negated expression.
767 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
768                                     bool LegalOperations, unsigned Depth = 0) {
769   const TargetOptions &Options = DAG.getTarget().Options;
770   // fneg is removable even if it has multiple uses.
771   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
772 
773   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
774 
775   const SDNodeFlags Flags = Op.getNode()->getFlags();
776 
777   switch (Op.getOpcode()) {
778   default: llvm_unreachable("Unknown code");
779   case ISD::ConstantFP: {
780     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
781     V.changeSign();
782     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
783   }
784   case ISD::FADD:
785     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
786 
787     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
788     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
789                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
790       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
791                          GetNegatedExpression(Op.getOperand(0), DAG,
792                                               LegalOperations, Depth+1),
793                          Op.getOperand(1), Flags);
794     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
795     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
796                        GetNegatedExpression(Op.getOperand(1), DAG,
797                                             LegalOperations, Depth+1),
798                        Op.getOperand(0), Flags);
799   case ISD::FSUB:
800     // fold (fneg (fsub 0, B)) -> B
801     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
802       if (N0CFP->isZero())
803         return Op.getOperand(1);
804 
805     // fold (fneg (fsub A, B)) -> (fsub B, A)
806     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
807                        Op.getOperand(1), Op.getOperand(0), Flags);
808 
809   case ISD::FMUL:
810   case ISD::FDIV:
811     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
812     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
813                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
814       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
815                          GetNegatedExpression(Op.getOperand(0), DAG,
816                                               LegalOperations, Depth+1),
817                          Op.getOperand(1), Flags);
818 
819     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
820     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
821                        Op.getOperand(0),
822                        GetNegatedExpression(Op.getOperand(1), DAG,
823                                             LegalOperations, Depth+1), Flags);
824 
825   case ISD::FP_EXTEND:
826   case ISD::FSIN:
827     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
828                        GetNegatedExpression(Op.getOperand(0), DAG,
829                                             LegalOperations, Depth+1));
830   case ISD::FP_ROUND:
831       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
832                          GetNegatedExpression(Op.getOperand(0), DAG,
833                                               LegalOperations, Depth+1),
834                          Op.getOperand(1));
835   }
836 }
837 
838 // APInts must be the same size for most operations, this helper
839 // function zero extends the shorter of the pair so that they match.
840 // We provide an Offset so that we can create bitwidths that won't overflow.
841 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
842   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
843   LHS = LHS.zextOrSelf(Bits);
844   RHS = RHS.zextOrSelf(Bits);
845 }
846 
847 // Return true if this node is a setcc, or is a select_cc
848 // that selects between the target values used for true and false, making it
849 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
850 // the appropriate nodes based on the type of node we are checking. This
851 // simplifies life a bit for the callers.
852 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
853                                     SDValue &CC) const {
854   if (N.getOpcode() == ISD::SETCC) {
855     LHS = N.getOperand(0);
856     RHS = N.getOperand(1);
857     CC  = N.getOperand(2);
858     return true;
859   }
860 
861   if (N.getOpcode() != ISD::SELECT_CC ||
862       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
863       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
864     return false;
865 
866   if (TLI.getBooleanContents(N.getValueType()) ==
867       TargetLowering::UndefinedBooleanContent)
868     return false;
869 
870   LHS = N.getOperand(0);
871   RHS = N.getOperand(1);
872   CC  = N.getOperand(4);
873   return true;
874 }
875 
876 /// Return true if this is a SetCC-equivalent operation with only one use.
877 /// If this is true, it allows the users to invert the operation for free when
878 /// it is profitable to do so.
879 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
880   SDValue N0, N1, N2;
881   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
882     return true;
883   return false;
884 }
885 
886 // Returns the SDNode if it is a constant float BuildVector
887 // or constant float.
888 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
889   if (isa<ConstantFPSDNode>(N))
890     return N.getNode();
891   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
892     return N.getNode();
893   return nullptr;
894 }
895 
896 // Determines if it is a constant integer or a build vector of constant
897 // integers (and undefs).
898 // Do not permit build vector implicit truncation.
899 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
900   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
901     return !(Const->isOpaque() && NoOpaques);
902   if (N.getOpcode() != ISD::BUILD_VECTOR)
903     return false;
904   unsigned BitWidth = N.getScalarValueSizeInBits();
905   for (const SDValue &Op : N->op_values()) {
906     if (Op.isUndef())
907       continue;
908     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
909     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
910         (Const->isOpaque() && NoOpaques))
911       return false;
912   }
913   return true;
914 }
915 
916 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
917 // undef's.
918 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
919   if (V.getOpcode() != ISD::BUILD_VECTOR)
920     return false;
921   return isConstantOrConstantVector(V, NoOpaques) ||
922          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
923 }
924 
925 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
926                                     SDValue N1, SDNodeFlags Flags) {
927   // Don't reassociate reductions.
928   if (Flags.hasVectorReduction())
929     return SDValue();
930 
931   EVT VT = N0.getValueType();
932   if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
933     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
934       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
935         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
936         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
937           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
938         return SDValue();
939       }
940       if (N0.hasOneUse()) {
941         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
942         // use
943         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
944         if (!OpNode.getNode())
945           return SDValue();
946         AddToWorklist(OpNode.getNode());
947         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
948       }
949     }
950   }
951 
952   if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
953     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
954       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
955         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
956         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
957           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
958         return SDValue();
959       }
960       if (N1.hasOneUse()) {
961         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
962         // use
963         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
964         if (!OpNode.getNode())
965           return SDValue();
966         AddToWorklist(OpNode.getNode());
967         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
968       }
969     }
970   }
971 
972   return SDValue();
973 }
974 
975 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
976                                bool AddTo) {
977   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
978   ++NodesCombined;
979   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
980              To[0].getNode()->dump(&DAG);
981              dbgs() << " and " << NumTo - 1 << " other values\n");
982   for (unsigned i = 0, e = NumTo; i != e; ++i)
983     assert((!To[i].getNode() ||
984             N->getValueType(i) == To[i].getValueType()) &&
985            "Cannot combine value to value of different type!");
986 
987   WorklistRemover DeadNodes(*this);
988   DAG.ReplaceAllUsesWith(N, To);
989   if (AddTo) {
990     // Push the new nodes and any users onto the worklist
991     for (unsigned i = 0, e = NumTo; i != e; ++i) {
992       if (To[i].getNode()) {
993         AddToWorklist(To[i].getNode());
994         AddUsersToWorklist(To[i].getNode());
995       }
996     }
997   }
998 
999   // Finally, if the node is now dead, remove it from the graph.  The node
1000   // may not be dead if the replacement process recursively simplified to
1001   // something else needing this node.
1002   if (N->use_empty())
1003     deleteAndRecombine(N);
1004   return SDValue(N, 0);
1005 }
1006 
1007 void DAGCombiner::
1008 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1009   // Replace all uses.  If any nodes become isomorphic to other nodes and
1010   // are deleted, make sure to remove them from our worklist.
1011   WorklistRemover DeadNodes(*this);
1012   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1013 
1014   // Push the new node and any (possibly new) users onto the worklist.
1015   AddToWorklist(TLO.New.getNode());
1016   AddUsersToWorklist(TLO.New.getNode());
1017 
1018   // Finally, if the node is now dead, remove it from the graph.  The node
1019   // may not be dead if the replacement process recursively simplified to
1020   // something else needing this node.
1021   if (TLO.Old.getNode()->use_empty())
1022     deleteAndRecombine(TLO.Old.getNode());
1023 }
1024 
1025 /// Check the specified integer node value to see if it can be simplified or if
1026 /// things it uses can be simplified by bit propagation. If so, return true.
1027 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1028   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1029   KnownBits Known;
1030   if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1031     return false;
1032 
1033   // Revisit the node.
1034   AddToWorklist(Op.getNode());
1035 
1036   // Replace the old value with the new one.
1037   ++NodesCombined;
1038   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1039              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1040              dbgs() << '\n');
1041 
1042   CommitTargetLoweringOpt(TLO);
1043   return true;
1044 }
1045 
1046 /// Check the specified vector node value to see if it can be simplified or
1047 /// if things it uses can be simplified as it only uses some of the elements.
1048 /// If so, return true.
1049 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
1050                                              bool AssumeSingleUse) {
1051   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1052   APInt KnownUndef, KnownZero;
1053   if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
1054                                       0, AssumeSingleUse))
1055     return false;
1056 
1057   // Revisit the node.
1058   AddToWorklist(Op.getNode());
1059 
1060   // Replace the old value with the new one.
1061   ++NodesCombined;
1062   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1063              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1064              dbgs() << '\n');
1065 
1066   CommitTargetLoweringOpt(TLO);
1067   return true;
1068 }
1069 
1070 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1071   SDLoc DL(Load);
1072   EVT VT = Load->getValueType(0);
1073   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1074 
1075   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1076              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1077   WorklistRemover DeadNodes(*this);
1078   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1079   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1080   deleteAndRecombine(Load);
1081   AddToWorklist(Trunc.getNode());
1082 }
1083 
1084 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1085   Replace = false;
1086   SDLoc DL(Op);
1087   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1088     LoadSDNode *LD = cast<LoadSDNode>(Op);
1089     EVT MemVT = LD->getMemoryVT();
1090     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1091                                                       : LD->getExtensionType();
1092     Replace = true;
1093     return DAG.getExtLoad(ExtType, DL, PVT,
1094                           LD->getChain(), LD->getBasePtr(),
1095                           MemVT, LD->getMemOperand());
1096   }
1097 
1098   unsigned Opc = Op.getOpcode();
1099   switch (Opc) {
1100   default: break;
1101   case ISD::AssertSext:
1102     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1103       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1104     break;
1105   case ISD::AssertZext:
1106     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1107       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1108     break;
1109   case ISD::Constant: {
1110     unsigned ExtOpc =
1111       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1112     return DAG.getNode(ExtOpc, DL, PVT, Op);
1113   }
1114   }
1115 
1116   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1117     return SDValue();
1118   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1119 }
1120 
1121 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1122   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1123     return SDValue();
1124   EVT OldVT = Op.getValueType();
1125   SDLoc DL(Op);
1126   bool Replace = false;
1127   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1128   if (!NewOp.getNode())
1129     return SDValue();
1130   AddToWorklist(NewOp.getNode());
1131 
1132   if (Replace)
1133     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1134   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1135                      DAG.getValueType(OldVT));
1136 }
1137 
1138 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1139   EVT OldVT = Op.getValueType();
1140   SDLoc DL(Op);
1141   bool Replace = false;
1142   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1143   if (!NewOp.getNode())
1144     return SDValue();
1145   AddToWorklist(NewOp.getNode());
1146 
1147   if (Replace)
1148     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1149   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1150 }
1151 
1152 /// Promote the specified integer binary operation if the target indicates it is
1153 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1154 /// i32 since i16 instructions are longer.
1155 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1156   if (!LegalOperations)
1157     return SDValue();
1158 
1159   EVT VT = Op.getValueType();
1160   if (VT.isVector() || !VT.isInteger())
1161     return SDValue();
1162 
1163   // If operation type is 'undesirable', e.g. i16 on x86, consider
1164   // promoting it.
1165   unsigned Opc = Op.getOpcode();
1166   if (TLI.isTypeDesirableForOp(Opc, VT))
1167     return SDValue();
1168 
1169   EVT PVT = VT;
1170   // Consult target whether it is a good idea to promote this operation and
1171   // what's the right type to promote it to.
1172   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1173     assert(PVT != VT && "Don't know what type to promote to!");
1174 
1175     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1176 
1177     bool Replace0 = false;
1178     SDValue N0 = Op.getOperand(0);
1179     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1180 
1181     bool Replace1 = false;
1182     SDValue N1 = Op.getOperand(1);
1183     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1184     SDLoc DL(Op);
1185 
1186     SDValue RV =
1187         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1188 
1189     // We are always replacing N0/N1's use in N and only need
1190     // additional replacements if there are additional uses.
1191     Replace0 &= !N0->hasOneUse();
1192     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1193 
1194     // Combine Op here so it is preserved past replacements.
1195     CombineTo(Op.getNode(), RV);
1196 
1197     // If operands have a use ordering, make sure we deal with
1198     // predecessor first.
1199     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1200       std::swap(N0, N1);
1201       std::swap(NN0, NN1);
1202     }
1203 
1204     if (Replace0) {
1205       AddToWorklist(NN0.getNode());
1206       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1207     }
1208     if (Replace1) {
1209       AddToWorklist(NN1.getNode());
1210       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1211     }
1212     return Op;
1213   }
1214   return SDValue();
1215 }
1216 
1217 /// Promote the specified integer shift operation if the target indicates it is
1218 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1219 /// i32 since i16 instructions are longer.
1220 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1221   if (!LegalOperations)
1222     return SDValue();
1223 
1224   EVT VT = Op.getValueType();
1225   if (VT.isVector() || !VT.isInteger())
1226     return SDValue();
1227 
1228   // If operation type is 'undesirable', e.g. i16 on x86, consider
1229   // promoting it.
1230   unsigned Opc = Op.getOpcode();
1231   if (TLI.isTypeDesirableForOp(Opc, VT))
1232     return SDValue();
1233 
1234   EVT PVT = VT;
1235   // Consult target whether it is a good idea to promote this operation and
1236   // what's the right type to promote it to.
1237   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1238     assert(PVT != VT && "Don't know what type to promote to!");
1239 
1240     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1241 
1242     bool Replace = false;
1243     SDValue N0 = Op.getOperand(0);
1244     SDValue N1 = Op.getOperand(1);
1245     if (Opc == ISD::SRA)
1246       N0 = SExtPromoteOperand(N0, PVT);
1247     else if (Opc == ISD::SRL)
1248       N0 = ZExtPromoteOperand(N0, PVT);
1249     else
1250       N0 = PromoteOperand(N0, PVT, Replace);
1251 
1252     if (!N0.getNode())
1253       return SDValue();
1254 
1255     SDLoc DL(Op);
1256     SDValue RV =
1257         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1258 
1259     AddToWorklist(N0.getNode());
1260     if (Replace)
1261       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1262 
1263     // Deal with Op being deleted.
1264     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1265       return RV;
1266   }
1267   return SDValue();
1268 }
1269 
1270 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1271   if (!LegalOperations)
1272     return SDValue();
1273 
1274   EVT VT = Op.getValueType();
1275   if (VT.isVector() || !VT.isInteger())
1276     return SDValue();
1277 
1278   // If operation type is 'undesirable', e.g. i16 on x86, consider
1279   // promoting it.
1280   unsigned Opc = Op.getOpcode();
1281   if (TLI.isTypeDesirableForOp(Opc, VT))
1282     return SDValue();
1283 
1284   EVT PVT = VT;
1285   // Consult target whether it is a good idea to promote this operation and
1286   // what's the right type to promote it to.
1287   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1288     assert(PVT != VT && "Don't know what type to promote to!");
1289     // fold (aext (aext x)) -> (aext x)
1290     // fold (aext (zext x)) -> (zext x)
1291     // fold (aext (sext x)) -> (sext x)
1292     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1293     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1294   }
1295   return SDValue();
1296 }
1297 
1298 bool DAGCombiner::PromoteLoad(SDValue Op) {
1299   if (!LegalOperations)
1300     return false;
1301 
1302   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1303     return false;
1304 
1305   EVT VT = Op.getValueType();
1306   if (VT.isVector() || !VT.isInteger())
1307     return false;
1308 
1309   // If operation type is 'undesirable', e.g. i16 on x86, consider
1310   // promoting it.
1311   unsigned Opc = Op.getOpcode();
1312   if (TLI.isTypeDesirableForOp(Opc, VT))
1313     return false;
1314 
1315   EVT PVT = VT;
1316   // Consult target whether it is a good idea to promote this operation and
1317   // what's the right type to promote it to.
1318   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1319     assert(PVT != VT && "Don't know what type to promote to!");
1320 
1321     SDLoc DL(Op);
1322     SDNode *N = Op.getNode();
1323     LoadSDNode *LD = cast<LoadSDNode>(N);
1324     EVT MemVT = LD->getMemoryVT();
1325     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1326                                                       : LD->getExtensionType();
1327     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1328                                    LD->getChain(), LD->getBasePtr(),
1329                                    MemVT, LD->getMemOperand());
1330     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1331 
1332     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1333                Result.getNode()->dump(&DAG); dbgs() << '\n');
1334     WorklistRemover DeadNodes(*this);
1335     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1336     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1337     deleteAndRecombine(N);
1338     AddToWorklist(Result.getNode());
1339     return true;
1340   }
1341   return false;
1342 }
1343 
1344 /// Recursively delete a node which has no uses and any operands for
1345 /// which it is the only use.
1346 ///
1347 /// Note that this both deletes the nodes and removes them from the worklist.
1348 /// It also adds any nodes who have had a user deleted to the worklist as they
1349 /// may now have only one use and subject to other combines.
1350 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1351   if (!N->use_empty())
1352     return false;
1353 
1354   SmallSetVector<SDNode *, 16> Nodes;
1355   Nodes.insert(N);
1356   do {
1357     N = Nodes.pop_back_val();
1358     if (!N)
1359       continue;
1360 
1361     if (N->use_empty()) {
1362       for (const SDValue &ChildN : N->op_values())
1363         Nodes.insert(ChildN.getNode());
1364 
1365       removeFromWorklist(N);
1366       DAG.DeleteNode(N);
1367     } else {
1368       AddToWorklist(N);
1369     }
1370   } while (!Nodes.empty());
1371   return true;
1372 }
1373 
1374 //===----------------------------------------------------------------------===//
1375 //  Main DAG Combiner implementation
1376 //===----------------------------------------------------------------------===//
1377 
1378 void DAGCombiner::Run(CombineLevel AtLevel) {
1379   // set the instance variables, so that the various visit routines may use it.
1380   Level = AtLevel;
1381   LegalOperations = Level >= AfterLegalizeVectorOps;
1382   LegalTypes = Level >= AfterLegalizeTypes;
1383 
1384   // Add all the dag nodes to the worklist.
1385   for (SDNode &Node : DAG.allnodes())
1386     AddToWorklist(&Node);
1387 
1388   // Create a dummy node (which is not added to allnodes), that adds a reference
1389   // to the root node, preventing it from being deleted, and tracking any
1390   // changes of the root.
1391   HandleSDNode Dummy(DAG.getRoot());
1392 
1393   // While the worklist isn't empty, find a node and try to combine it.
1394   while (!WorklistMap.empty()) {
1395     SDNode *N;
1396     // The Worklist holds the SDNodes in order, but it may contain null entries.
1397     do {
1398       N = Worklist.pop_back_val();
1399     } while (!N);
1400 
1401     bool GoodWorklistEntry = WorklistMap.erase(N);
1402     (void)GoodWorklistEntry;
1403     assert(GoodWorklistEntry &&
1404            "Found a worklist entry without a corresponding map entry!");
1405 
1406     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1407     // N is deleted from the DAG, since they too may now be dead or may have a
1408     // reduced number of uses, allowing other xforms.
1409     if (recursivelyDeleteUnusedNodes(N))
1410       continue;
1411 
1412     WorklistRemover DeadNodes(*this);
1413 
1414     // If this combine is running after legalizing the DAG, re-legalize any
1415     // nodes pulled off the worklist.
1416     if (Level == AfterLegalizeDAG) {
1417       SmallSetVector<SDNode *, 16> UpdatedNodes;
1418       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1419 
1420       for (SDNode *LN : UpdatedNodes) {
1421         AddToWorklist(LN);
1422         AddUsersToWorklist(LN);
1423       }
1424       if (!NIsValid)
1425         continue;
1426     }
1427 
1428     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1429 
1430     // Add any operands of the new node which have not yet been combined to the
1431     // worklist as well. Because the worklist uniques things already, this
1432     // won't repeatedly process the same operand.
1433     CombinedNodes.insert(N);
1434     for (const SDValue &ChildN : N->op_values())
1435       if (!CombinedNodes.count(ChildN.getNode()))
1436         AddToWorklist(ChildN.getNode());
1437 
1438     SDValue RV = combine(N);
1439 
1440     if (!RV.getNode())
1441       continue;
1442 
1443     ++NodesCombined;
1444 
1445     // If we get back the same node we passed in, rather than a new node or
1446     // zero, we know that the node must have defined multiple values and
1447     // CombineTo was used.  Since CombineTo takes care of the worklist
1448     // mechanics for us, we have no work to do in this case.
1449     if (RV.getNode() == N)
1450       continue;
1451 
1452     assert(N->getOpcode() != ISD::DELETED_NODE &&
1453            RV.getOpcode() != ISD::DELETED_NODE &&
1454            "Node was deleted but visit returned new node!");
1455 
1456     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1457 
1458     if (N->getNumValues() == RV.getNode()->getNumValues())
1459       DAG.ReplaceAllUsesWith(N, RV.getNode());
1460     else {
1461       assert(N->getValueType(0) == RV.getValueType() &&
1462              N->getNumValues() == 1 && "Type mismatch");
1463       DAG.ReplaceAllUsesWith(N, &RV);
1464     }
1465 
1466     // Push the new node and any users onto the worklist
1467     AddToWorklist(RV.getNode());
1468     AddUsersToWorklist(RV.getNode());
1469 
1470     // Finally, if the node is now dead, remove it from the graph.  The node
1471     // may not be dead if the replacement process recursively simplified to
1472     // something else needing this node. This will also take care of adding any
1473     // operands which have lost a user to the worklist.
1474     recursivelyDeleteUnusedNodes(N);
1475   }
1476 
1477   // If the root changed (e.g. it was a dead load, update the root).
1478   DAG.setRoot(Dummy.getValue());
1479   DAG.RemoveDeadNodes();
1480 }
1481 
1482 SDValue DAGCombiner::visit(SDNode *N) {
1483   switch (N->getOpcode()) {
1484   default: break;
1485   case ISD::TokenFactor:        return visitTokenFactor(N);
1486   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1487   case ISD::ADD:                return visitADD(N);
1488   case ISD::SUB:                return visitSUB(N);
1489   case ISD::SADDSAT:
1490   case ISD::UADDSAT:            return visitADDSAT(N);
1491   case ISD::SSUBSAT:
1492   case ISD::USUBSAT:            return visitSUBSAT(N);
1493   case ISD::ADDC:               return visitADDC(N);
1494   case ISD::SADDO:
1495   case ISD::UADDO:              return visitADDO(N);
1496   case ISD::SUBC:               return visitSUBC(N);
1497   case ISD::SSUBO:
1498   case ISD::USUBO:              return visitSUBO(N);
1499   case ISD::ADDE:               return visitADDE(N);
1500   case ISD::ADDCARRY:           return visitADDCARRY(N);
1501   case ISD::SUBE:               return visitSUBE(N);
1502   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1503   case ISD::MUL:                return visitMUL(N);
1504   case ISD::SDIV:               return visitSDIV(N);
1505   case ISD::UDIV:               return visitUDIV(N);
1506   case ISD::SREM:
1507   case ISD::UREM:               return visitREM(N);
1508   case ISD::MULHU:              return visitMULHU(N);
1509   case ISD::MULHS:              return visitMULHS(N);
1510   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1511   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1512   case ISD::SMULO:
1513   case ISD::UMULO:              return visitMULO(N);
1514   case ISD::SMIN:
1515   case ISD::SMAX:
1516   case ISD::UMIN:
1517   case ISD::UMAX:               return visitIMINMAX(N);
1518   case ISD::AND:                return visitAND(N);
1519   case ISD::OR:                 return visitOR(N);
1520   case ISD::XOR:                return visitXOR(N);
1521   case ISD::SHL:                return visitSHL(N);
1522   case ISD::SRA:                return visitSRA(N);
1523   case ISD::SRL:                return visitSRL(N);
1524   case ISD::ROTR:
1525   case ISD::ROTL:               return visitRotate(N);
1526   case ISD::FSHL:
1527   case ISD::FSHR:               return visitFunnelShift(N);
1528   case ISD::ABS:                return visitABS(N);
1529   case ISD::BSWAP:              return visitBSWAP(N);
1530   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1531   case ISD::CTLZ:               return visitCTLZ(N);
1532   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1533   case ISD::CTTZ:               return visitCTTZ(N);
1534   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1535   case ISD::CTPOP:              return visitCTPOP(N);
1536   case ISD::SELECT:             return visitSELECT(N);
1537   case ISD::VSELECT:            return visitVSELECT(N);
1538   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1539   case ISD::SETCC:              return visitSETCC(N);
1540   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1541   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1542   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1543   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1544   case ISD::AssertSext:
1545   case ISD::AssertZext:         return visitAssertExt(N);
1546   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1547   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1548   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1549   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1550   case ISD::BITCAST:            return visitBITCAST(N);
1551   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1552   case ISD::FADD:               return visitFADD(N);
1553   case ISD::FSUB:               return visitFSUB(N);
1554   case ISD::FMUL:               return visitFMUL(N);
1555   case ISD::FMA:                return visitFMA(N);
1556   case ISD::FDIV:               return visitFDIV(N);
1557   case ISD::FREM:               return visitFREM(N);
1558   case ISD::FSQRT:              return visitFSQRT(N);
1559   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1560   case ISD::FPOW:               return visitFPOW(N);
1561   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1562   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1563   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1564   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1565   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1566   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1567   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1568   case ISD::FNEG:               return visitFNEG(N);
1569   case ISD::FABS:               return visitFABS(N);
1570   case ISD::FFLOOR:             return visitFFLOOR(N);
1571   case ISD::FMINNUM:            return visitFMINNUM(N);
1572   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1573   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1574   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1575   case ISD::FCEIL:              return visitFCEIL(N);
1576   case ISD::FTRUNC:             return visitFTRUNC(N);
1577   case ISD::BRCOND:             return visitBRCOND(N);
1578   case ISD::BR_CC:              return visitBR_CC(N);
1579   case ISD::LOAD:               return visitLOAD(N);
1580   case ISD::STORE:              return visitSTORE(N);
1581   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1582   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1583   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1584   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1585   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1586   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1587   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1588   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1589   case ISD::MGATHER:            return visitMGATHER(N);
1590   case ISD::MLOAD:              return visitMLOAD(N);
1591   case ISD::MSCATTER:           return visitMSCATTER(N);
1592   case ISD::MSTORE:             return visitMSTORE(N);
1593   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1594   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1595   }
1596   return SDValue();
1597 }
1598 
1599 SDValue DAGCombiner::combine(SDNode *N) {
1600   SDValue RV = visit(N);
1601 
1602   // If nothing happened, try a target-specific DAG combine.
1603   if (!RV.getNode()) {
1604     assert(N->getOpcode() != ISD::DELETED_NODE &&
1605            "Node was deleted but visit returned NULL!");
1606 
1607     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1608         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1609 
1610       // Expose the DAG combiner to the target combiner impls.
1611       TargetLowering::DAGCombinerInfo
1612         DagCombineInfo(DAG, Level, false, this);
1613 
1614       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1615     }
1616   }
1617 
1618   // If nothing happened still, try promoting the operation.
1619   if (!RV.getNode()) {
1620     switch (N->getOpcode()) {
1621     default: break;
1622     case ISD::ADD:
1623     case ISD::SUB:
1624     case ISD::MUL:
1625     case ISD::AND:
1626     case ISD::OR:
1627     case ISD::XOR:
1628       RV = PromoteIntBinOp(SDValue(N, 0));
1629       break;
1630     case ISD::SHL:
1631     case ISD::SRA:
1632     case ISD::SRL:
1633       RV = PromoteIntShiftOp(SDValue(N, 0));
1634       break;
1635     case ISD::SIGN_EXTEND:
1636     case ISD::ZERO_EXTEND:
1637     case ISD::ANY_EXTEND:
1638       RV = PromoteExtend(SDValue(N, 0));
1639       break;
1640     case ISD::LOAD:
1641       if (PromoteLoad(SDValue(N, 0)))
1642         RV = SDValue(N, 0);
1643       break;
1644     }
1645   }
1646 
1647   // If N is a commutative binary node, try eliminate it if the commuted
1648   // version is already present in the DAG.
1649   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1650       N->getNumValues() == 1) {
1651     SDValue N0 = N->getOperand(0);
1652     SDValue N1 = N->getOperand(1);
1653 
1654     // Constant operands are canonicalized to RHS.
1655     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1656       SDValue Ops[] = {N1, N0};
1657       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1658                                             N->getFlags());
1659       if (CSENode)
1660         return SDValue(CSENode, 0);
1661     }
1662   }
1663 
1664   return RV;
1665 }
1666 
1667 /// Given a node, return its input chain if it has one, otherwise return a null
1668 /// sd operand.
1669 static SDValue getInputChainForNode(SDNode *N) {
1670   if (unsigned NumOps = N->getNumOperands()) {
1671     if (N->getOperand(0).getValueType() == MVT::Other)
1672       return N->getOperand(0);
1673     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1674       return N->getOperand(NumOps-1);
1675     for (unsigned i = 1; i < NumOps-1; ++i)
1676       if (N->getOperand(i).getValueType() == MVT::Other)
1677         return N->getOperand(i);
1678   }
1679   return SDValue();
1680 }
1681 
1682 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1683   // If N has two operands, where one has an input chain equal to the other,
1684   // the 'other' chain is redundant.
1685   if (N->getNumOperands() == 2) {
1686     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1687       return N->getOperand(0);
1688     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1689       return N->getOperand(1);
1690   }
1691 
1692   // Don't simplify token factors if optnone.
1693   if (OptLevel == CodeGenOpt::None)
1694     return SDValue();
1695 
1696   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1697   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1698   SmallPtrSet<SDNode*, 16> SeenOps;
1699   bool Changed = false;             // If we should replace this token factor.
1700 
1701   // Start out with this token factor.
1702   TFs.push_back(N);
1703 
1704   // Iterate through token factors.  The TFs grows when new token factors are
1705   // encountered.
1706   for (unsigned i = 0; i < TFs.size(); ++i) {
1707     SDNode *TF = TFs[i];
1708 
1709     // Check each of the operands.
1710     for (const SDValue &Op : TF->op_values()) {
1711       switch (Op.getOpcode()) {
1712       case ISD::EntryToken:
1713         // Entry tokens don't need to be added to the list. They are
1714         // redundant.
1715         Changed = true;
1716         break;
1717 
1718       case ISD::TokenFactor:
1719         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1720           // Queue up for processing.
1721           TFs.push_back(Op.getNode());
1722           // Clean up in case the token factor is removed.
1723           AddToWorklist(Op.getNode());
1724           Changed = true;
1725           break;
1726         }
1727         LLVM_FALLTHROUGH;
1728 
1729       default:
1730         // Only add if it isn't already in the list.
1731         if (SeenOps.insert(Op.getNode()).second)
1732           Ops.push_back(Op);
1733         else
1734           Changed = true;
1735         break;
1736       }
1737     }
1738   }
1739 
1740   // Remove Nodes that are chained to another node in the list. Do so
1741   // by walking up chains breath-first stopping when we've seen
1742   // another operand. In general we must climb to the EntryNode, but we can exit
1743   // early if we find all remaining work is associated with just one operand as
1744   // no further pruning is possible.
1745 
1746   // List of nodes to search through and original Ops from which they originate.
1747   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1748   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1749   SmallPtrSet<SDNode *, 16> SeenChains;
1750   bool DidPruneOps = false;
1751 
1752   unsigned NumLeftToConsider = 0;
1753   for (const SDValue &Op : Ops) {
1754     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1755     OpWorkCount.push_back(1);
1756   }
1757 
1758   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1759     // If this is an Op, we can remove the op from the list. Remark any
1760     // search associated with it as from the current OpNumber.
1761     if (SeenOps.count(Op) != 0) {
1762       Changed = true;
1763       DidPruneOps = true;
1764       unsigned OrigOpNumber = 0;
1765       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1766         OrigOpNumber++;
1767       assert((OrigOpNumber != Ops.size()) &&
1768              "expected to find TokenFactor Operand");
1769       // Re-mark worklist from OrigOpNumber to OpNumber
1770       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1771         if (Worklist[i].second == OrigOpNumber) {
1772           Worklist[i].second = OpNumber;
1773         }
1774       }
1775       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1776       OpWorkCount[OrigOpNumber] = 0;
1777       NumLeftToConsider--;
1778     }
1779     // Add if it's a new chain
1780     if (SeenChains.insert(Op).second) {
1781       OpWorkCount[OpNumber]++;
1782       Worklist.push_back(std::make_pair(Op, OpNumber));
1783     }
1784   };
1785 
1786   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1787     // We need at least be consider at least 2 Ops to prune.
1788     if (NumLeftToConsider <= 1)
1789       break;
1790     auto CurNode = Worklist[i].first;
1791     auto CurOpNumber = Worklist[i].second;
1792     assert((OpWorkCount[CurOpNumber] > 0) &&
1793            "Node should not appear in worklist");
1794     switch (CurNode->getOpcode()) {
1795     case ISD::EntryToken:
1796       // Hitting EntryToken is the only way for the search to terminate without
1797       // hitting
1798       // another operand's search. Prevent us from marking this operand
1799       // considered.
1800       NumLeftToConsider++;
1801       break;
1802     case ISD::TokenFactor:
1803       for (const SDValue &Op : CurNode->op_values())
1804         AddToWorklist(i, Op.getNode(), CurOpNumber);
1805       break;
1806     case ISD::CopyFromReg:
1807     case ISD::CopyToReg:
1808       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1809       break;
1810     default:
1811       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1812         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1813       break;
1814     }
1815     OpWorkCount[CurOpNumber]--;
1816     if (OpWorkCount[CurOpNumber] == 0)
1817       NumLeftToConsider--;
1818   }
1819 
1820   // If we've changed things around then replace token factor.
1821   if (Changed) {
1822     SDValue Result;
1823     if (Ops.empty()) {
1824       // The entry token is the only possible outcome.
1825       Result = DAG.getEntryNode();
1826     } else {
1827       if (DidPruneOps) {
1828         SmallVector<SDValue, 8> PrunedOps;
1829         //
1830         for (const SDValue &Op : Ops) {
1831           if (SeenChains.count(Op.getNode()) == 0)
1832             PrunedOps.push_back(Op);
1833         }
1834         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1835       } else {
1836         Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1837       }
1838     }
1839     return Result;
1840   }
1841   return SDValue();
1842 }
1843 
1844 /// MERGE_VALUES can always be eliminated.
1845 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1846   WorklistRemover DeadNodes(*this);
1847   // Replacing results may cause a different MERGE_VALUES to suddenly
1848   // be CSE'd with N, and carry its uses with it. Iterate until no
1849   // uses remain, to ensure that the node can be safely deleted.
1850   // First add the users of this node to the work list so that they
1851   // can be tried again once they have new operands.
1852   AddUsersToWorklist(N);
1853   do {
1854     // Do as a single replacement to avoid rewalking use lists.
1855     SmallVector<SDValue, 8> Ops;
1856     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1857       Ops.push_back(N->getOperand(i));
1858     DAG.ReplaceAllUsesWith(N, Ops.data());
1859   } while (!N->use_empty());
1860   deleteAndRecombine(N);
1861   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1862 }
1863 
1864 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1865 /// ConstantSDNode pointer else nullptr.
1866 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1867   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1868   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1869 }
1870 
1871 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1872   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1873 
1874   // Don't do this unless the old select is going away. We want to eliminate the
1875   // binary operator, not replace a binop with a select.
1876   // TODO: Handle ISD::SELECT_CC.
1877   unsigned SelOpNo = 0;
1878   SDValue Sel = BO->getOperand(0);
1879   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1880     SelOpNo = 1;
1881     Sel = BO->getOperand(1);
1882   }
1883 
1884   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1885     return SDValue();
1886 
1887   SDValue CT = Sel.getOperand(1);
1888   if (!isConstantOrConstantVector(CT, true) &&
1889       !isConstantFPBuildVectorOrConstantFP(CT))
1890     return SDValue();
1891 
1892   SDValue CF = Sel.getOperand(2);
1893   if (!isConstantOrConstantVector(CF, true) &&
1894       !isConstantFPBuildVectorOrConstantFP(CF))
1895     return SDValue();
1896 
1897   // Bail out if any constants are opaque because we can't constant fold those.
1898   // The exception is "and" and "or" with either 0 or -1 in which case we can
1899   // propagate non constant operands into select. I.e.:
1900   // and (select Cond, 0, -1), X --> select Cond, 0, X
1901   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1902   auto BinOpcode = BO->getOpcode();
1903   bool CanFoldNonConst =
1904       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1905       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1906       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1907 
1908   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1909   if (!CanFoldNonConst &&
1910       !isConstantOrConstantVector(CBO, true) &&
1911       !isConstantFPBuildVectorOrConstantFP(CBO))
1912     return SDValue();
1913 
1914   EVT VT = Sel.getValueType();
1915 
1916   // In case of shift value and shift amount may have different VT. For instance
1917   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1918   // swapped operands and value types do not match. NB: x86 is fine if operands
1919   // are not swapped with shift amount VT being not bigger than shifted value.
1920   // TODO: that is possible to check for a shift operation, correct VTs and
1921   // still perform optimization on x86 if needed.
1922   if (SelOpNo && VT != CBO.getValueType())
1923     return SDValue();
1924 
1925   // We have a select-of-constants followed by a binary operator with a
1926   // constant. Eliminate the binop by pulling the constant math into the select.
1927   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1928   SDLoc DL(Sel);
1929   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1930                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1931   if (!CanFoldNonConst && !NewCT.isUndef() &&
1932       !isConstantOrConstantVector(NewCT, true) &&
1933       !isConstantFPBuildVectorOrConstantFP(NewCT))
1934     return SDValue();
1935 
1936   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1937                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1938   if (!CanFoldNonConst && !NewCF.isUndef() &&
1939       !isConstantOrConstantVector(NewCF, true) &&
1940       !isConstantFPBuildVectorOrConstantFP(NewCF))
1941     return SDValue();
1942 
1943   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1944 }
1945 
1946 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1947   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1948          "Expecting add or sub");
1949 
1950   // Match a constant operand and a zext operand for the math instruction:
1951   // add Z, C
1952   // sub C, Z
1953   bool IsAdd = N->getOpcode() == ISD::ADD;
1954   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1955   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1956   auto *CN = dyn_cast<ConstantSDNode>(C);
1957   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
1958     return SDValue();
1959 
1960   // Match the zext operand as a setcc of a boolean.
1961   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
1962       Z.getOperand(0).getValueType() != MVT::i1)
1963     return SDValue();
1964 
1965   // Match the compare as: setcc (X & 1), 0, eq.
1966   SDValue SetCC = Z.getOperand(0);
1967   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
1968   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
1969       SetCC.getOperand(0).getOpcode() != ISD::AND ||
1970       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
1971     return SDValue();
1972 
1973   // We are adding/subtracting a constant and an inverted low bit. Turn that
1974   // into a subtract/add of the low bit with incremented/decremented constant:
1975   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
1976   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
1977   EVT VT = C.getValueType();
1978   SDLoc DL(N);
1979   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
1980   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
1981                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
1982   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
1983 }
1984 
1985 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
1986 /// a shift and add with a different constant.
1987 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
1988   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1989          "Expecting add or sub");
1990 
1991   // We need a constant operand for the add/sub, and the other operand is a
1992   // logical shift right: add (srl), C or sub C, (srl).
1993   bool IsAdd = N->getOpcode() == ISD::ADD;
1994   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
1995   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
1996   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
1997   if (!C || ShiftOp.getOpcode() != ISD::SRL)
1998     return SDValue();
1999 
2000   // The shift must be of a 'not' value.
2001   SDValue Not = ShiftOp.getOperand(0);
2002   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2003     return SDValue();
2004 
2005   // The shift must be moving the sign bit to the least-significant-bit.
2006   EVT VT = ShiftOp.getValueType();
2007   SDValue ShAmt = ShiftOp.getOperand(1);
2008   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2009   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2010     return SDValue();
2011 
2012   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2013   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2014   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2015   SDLoc DL(N);
2016   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2017   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2018   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2019   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2020 }
2021 
2022 SDValue DAGCombiner::visitADD(SDNode *N) {
2023   SDValue N0 = N->getOperand(0);
2024   SDValue N1 = N->getOperand(1);
2025   EVT VT = N0.getValueType();
2026   SDLoc DL(N);
2027 
2028   // fold vector ops
2029   if (VT.isVector()) {
2030     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2031       return FoldedVOp;
2032 
2033     // fold (add x, 0) -> x, vector edition
2034     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2035       return N0;
2036     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2037       return N1;
2038   }
2039 
2040   // fold (add x, undef) -> undef
2041   if (N0.isUndef())
2042     return N0;
2043 
2044   if (N1.isUndef())
2045     return N1;
2046 
2047   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2048     // canonicalize constant to RHS
2049     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2050       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2051     // fold (add c1, c2) -> c1+c2
2052     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2053                                       N1.getNode());
2054   }
2055 
2056   // fold (add x, 0) -> x
2057   if (isNullConstant(N1))
2058     return N0;
2059 
2060   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2061     // fold ((c1-A)+c2) -> (c1+c2)-A
2062     if (N0.getOpcode() == ISD::SUB &&
2063         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2064       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2065       return DAG.getNode(ISD::SUB, DL, VT,
2066                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2067                          N0.getOperand(1));
2068     }
2069 
2070     // add (sext i1 X), 1 -> zext (not i1 X)
2071     // We don't transform this pattern:
2072     //   add (zext i1 X), -1 -> sext (not i1 X)
2073     // because most (?) targets generate better code for the zext form.
2074     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2075         isOneOrOneSplat(N1)) {
2076       SDValue X = N0.getOperand(0);
2077       if ((!LegalOperations ||
2078            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2079             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2080           X.getScalarValueSizeInBits() == 1) {
2081         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2082         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2083       }
2084     }
2085 
2086     // Undo the add -> or combine to merge constant offsets from a frame index.
2087     if (N0.getOpcode() == ISD::OR &&
2088         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2089         isa<ConstantSDNode>(N0.getOperand(1)) &&
2090         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2091       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2092       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2093     }
2094   }
2095 
2096   if (SDValue NewSel = foldBinOpIntoSelect(N))
2097     return NewSel;
2098 
2099   // reassociate add
2100   if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2101     return RADD;
2102 
2103   // fold ((0-A) + B) -> B-A
2104   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2105     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2106 
2107   // fold (A + (0-B)) -> A-B
2108   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2109     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2110 
2111   // fold (A+(B-A)) -> B
2112   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2113     return N1.getOperand(0);
2114 
2115   // fold ((B-A)+A) -> B
2116   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2117     return N0.getOperand(0);
2118 
2119   // fold ((A-B)+(C-A)) -> (C-B)
2120   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2121       N0.getOperand(0) == N1.getOperand(1))
2122     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2123                        N0.getOperand(1));
2124 
2125   // fold ((A-B)+(B-C)) -> (A-C)
2126   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2127       N0.getOperand(1) == N1.getOperand(0))
2128     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2129                        N1.getOperand(1));
2130 
2131   // fold (A+(B-(A+C))) to (B-C)
2132   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2133       N0 == N1.getOperand(1).getOperand(0))
2134     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2135                        N1.getOperand(1).getOperand(1));
2136 
2137   // fold (A+(B-(C+A))) to (B-C)
2138   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2139       N0 == N1.getOperand(1).getOperand(1))
2140     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2141                        N1.getOperand(1).getOperand(0));
2142 
2143   // fold (A+((B-A)+or-C)) to (B+or-C)
2144   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2145       N1.getOperand(0).getOpcode() == ISD::SUB &&
2146       N0 == N1.getOperand(0).getOperand(1))
2147     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2148                        N1.getOperand(1));
2149 
2150   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2151   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2152     SDValue N00 = N0.getOperand(0);
2153     SDValue N01 = N0.getOperand(1);
2154     SDValue N10 = N1.getOperand(0);
2155     SDValue N11 = N1.getOperand(1);
2156 
2157     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2158       return DAG.getNode(ISD::SUB, DL, VT,
2159                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2160                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2161   }
2162 
2163   // fold (add (umax X, C), -C) --> (usubsat X, C)
2164   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2165     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2166       return (!Max && !Op) ||
2167              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2168     };
2169     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2170                                   /*AllowUndefs*/ true))
2171       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2172                          N0.getOperand(1));
2173   }
2174 
2175   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2176     return V;
2177 
2178   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2179     return V;
2180 
2181   if (SimplifyDemandedBits(SDValue(N, 0)))
2182     return SDValue(N, 0);
2183 
2184   // fold (a+b) -> (a|b) iff a and b share no bits.
2185   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2186       DAG.haveNoCommonBitsSet(N0, N1))
2187     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2188 
2189   if (isOneOrOneSplat(N1)) {
2190     // fold (add (xor a, -1), 1) -> (sub 0, a)
2191     if (isBitwiseNot(N0))
2192       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2193                          N0.getOperand(0));
2194 
2195     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2196     if (N0.getOpcode() == ISD::ADD ||
2197         N0.getOpcode() == ISD::UADDO ||
2198         N0.getOpcode() == ISD::SADDO) {
2199       SDValue A, Xor;
2200 
2201       if (isBitwiseNot(N0.getOperand(0))) {
2202         A = N0.getOperand(1);
2203         Xor = N0.getOperand(0);
2204       } else if (isBitwiseNot(N0.getOperand(1))) {
2205         A = N0.getOperand(0);
2206         Xor = N0.getOperand(1);
2207       }
2208 
2209       if (Xor)
2210         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2211     }
2212   }
2213 
2214   if (SDValue Combined = visitADDLike(N0, N1, N))
2215     return Combined;
2216 
2217   if (SDValue Combined = visitADDLike(N1, N0, N))
2218     return Combined;
2219 
2220   return SDValue();
2221 }
2222 
2223 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2224   unsigned Opcode = N->getOpcode();
2225   SDValue N0 = N->getOperand(0);
2226   SDValue N1 = N->getOperand(1);
2227   EVT VT = N0.getValueType();
2228   SDLoc DL(N);
2229 
2230   // fold vector ops
2231   if (VT.isVector()) {
2232     // TODO SimplifyVBinOp
2233 
2234     // fold (add_sat x, 0) -> x, vector edition
2235     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2236       return N0;
2237     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2238       return N1;
2239   }
2240 
2241   // fold (add_sat x, undef) -> -1
2242   if (N0.isUndef() || N1.isUndef())
2243     return DAG.getAllOnesConstant(DL, VT);
2244 
2245   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2246     // canonicalize constant to RHS
2247     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2248       return DAG.getNode(Opcode, DL, VT, N1, N0);
2249     // fold (add_sat c1, c2) -> c3
2250     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2251                                       N1.getNode());
2252   }
2253 
2254   // fold (add_sat x, 0) -> x
2255   if (isNullConstant(N1))
2256     return N0;
2257 
2258   // If it cannot overflow, transform into an add.
2259   if (Opcode == ISD::UADDSAT)
2260     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2261       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2262 
2263   return SDValue();
2264 }
2265 
2266 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2267   bool Masked = false;
2268 
2269   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2270   while (true) {
2271     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2272       V = V.getOperand(0);
2273       continue;
2274     }
2275 
2276     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2277       Masked = true;
2278       V = V.getOperand(0);
2279       continue;
2280     }
2281 
2282     break;
2283   }
2284 
2285   // If this is not a carry, return.
2286   if (V.getResNo() != 1)
2287     return SDValue();
2288 
2289   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2290       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2291     return SDValue();
2292 
2293   // If the result is masked, then no matter what kind of bool it is we can
2294   // return. If it isn't, then we need to make sure the bool type is either 0 or
2295   // 1 and not other values.
2296   if (Masked ||
2297       TLI.getBooleanContents(V.getValueType()) ==
2298           TargetLoweringBase::ZeroOrOneBooleanContent)
2299     return V;
2300 
2301   return SDValue();
2302 }
2303 
2304 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2305 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2306 /// the opcode and bypass the mask operation.
2307 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2308                                  SelectionDAG &DAG, const SDLoc &DL) {
2309   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2310     return SDValue();
2311 
2312   EVT VT = N0.getValueType();
2313   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2314     return SDValue();
2315 
2316   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2317   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2318   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2319 }
2320 
2321 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2322   EVT VT = N0.getValueType();
2323   SDLoc DL(LocReference);
2324 
2325   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2326   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2327       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2328     return DAG.getNode(ISD::SUB, DL, VT, N0,
2329                        DAG.getNode(ISD::SHL, DL, VT,
2330                                    N1.getOperand(0).getOperand(1),
2331                                    N1.getOperand(1)));
2332 
2333   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2334     return V;
2335 
2336   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2337   // rather than 'add 0/-1' (the zext should get folded).
2338   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2339   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2340       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2341       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2342     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2343     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2344   }
2345 
2346   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2347   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2348     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2349     if (TN->getVT() == MVT::i1) {
2350       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2351                                  DAG.getConstant(1, DL, VT));
2352       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2353     }
2354   }
2355 
2356   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2357   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2358       N1.getResNo() == 0)
2359     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2360                        N0, N1.getOperand(0), N1.getOperand(2));
2361 
2362   // (add X, Carry) -> (addcarry X, 0, Carry)
2363   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2364     if (SDValue Carry = getAsCarry(TLI, N1))
2365       return DAG.getNode(ISD::ADDCARRY, DL,
2366                          DAG.getVTList(VT, Carry.getValueType()), N0,
2367                          DAG.getConstant(0, DL, VT), Carry);
2368 
2369   return SDValue();
2370 }
2371 
2372 SDValue DAGCombiner::visitADDC(SDNode *N) {
2373   SDValue N0 = N->getOperand(0);
2374   SDValue N1 = N->getOperand(1);
2375   EVT VT = N0.getValueType();
2376   SDLoc DL(N);
2377 
2378   // If the flag result is dead, turn this into an ADD.
2379   if (!N->hasAnyUseOfValue(1))
2380     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2381                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2382 
2383   // canonicalize constant to RHS.
2384   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2385   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2386   if (N0C && !N1C)
2387     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2388 
2389   // fold (addc x, 0) -> x + no carry out
2390   if (isNullConstant(N1))
2391     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2392                                         DL, MVT::Glue));
2393 
2394   // If it cannot overflow, transform into an add.
2395   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2396     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2397                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2398 
2399   return SDValue();
2400 }
2401 
2402 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2403                            SelectionDAG &DAG, const TargetLowering &TLI) {
2404   EVT VT = V.getValueType();
2405 
2406   SDValue Cst;
2407   switch (TLI.getBooleanContents(VT)) {
2408   case TargetLowering::ZeroOrOneBooleanContent:
2409   case TargetLowering::UndefinedBooleanContent:
2410     Cst = DAG.getConstant(1, DL, VT);
2411     break;
2412   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2413     Cst = DAG.getConstant(-1, DL, VT);
2414     break;
2415   }
2416 
2417   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2418 }
2419 
2420 static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
2421   if (V.getOpcode() != ISD::XOR)
2422     return SDValue();
2423 
2424   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2425   if (!Const)
2426     return SDValue();
2427 
2428   EVT VT = V.getValueType();
2429 
2430   bool IsFlip = false;
2431   switch(TLI.getBooleanContents(VT)) {
2432     case TargetLowering::ZeroOrOneBooleanContent:
2433       IsFlip = Const->isOne();
2434       break;
2435     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2436       IsFlip = Const->isAllOnesValue();
2437       break;
2438     case TargetLowering::UndefinedBooleanContent:
2439       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2440       break;
2441   }
2442 
2443   if (IsFlip)
2444     return V.getOperand(0);
2445   return SDValue();
2446 }
2447 
2448 SDValue DAGCombiner::visitADDO(SDNode *N) {
2449   SDValue N0 = N->getOperand(0);
2450   SDValue N1 = N->getOperand(1);
2451   EVT VT = N0.getValueType();
2452   bool IsSigned = (ISD::SADDO == N->getOpcode());
2453 
2454   EVT CarryVT = N->getValueType(1);
2455   SDLoc DL(N);
2456 
2457   // If the flag result is dead, turn this into an ADD.
2458   if (!N->hasAnyUseOfValue(1))
2459     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2460                      DAG.getUNDEF(CarryVT));
2461 
2462   // canonicalize constant to RHS.
2463   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2464       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2465     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2466 
2467   // fold (addo x, 0) -> x + no carry out
2468   if (isNullOrNullSplat(N1))
2469     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2470 
2471   if (!IsSigned) {
2472     // If it cannot overflow, transform into an add.
2473     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2474       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2475                        DAG.getConstant(0, DL, CarryVT));
2476 
2477     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2478     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2479       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2480                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2481       return CombineTo(N, Sub,
2482                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2483     }
2484 
2485     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2486       return Combined;
2487 
2488     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2489       return Combined;
2490   }
2491 
2492   return SDValue();
2493 }
2494 
2495 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2496   EVT VT = N0.getValueType();
2497   if (VT.isVector())
2498     return SDValue();
2499 
2500   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2501   // If Y + 1 cannot overflow.
2502   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2503     SDValue Y = N1.getOperand(0);
2504     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2505     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2506       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2507                          N1.getOperand(2));
2508   }
2509 
2510   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2511   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2512     if (SDValue Carry = getAsCarry(TLI, N1))
2513       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2514                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2515 
2516   return SDValue();
2517 }
2518 
2519 SDValue DAGCombiner::visitADDE(SDNode *N) {
2520   SDValue N0 = N->getOperand(0);
2521   SDValue N1 = N->getOperand(1);
2522   SDValue CarryIn = N->getOperand(2);
2523 
2524   // canonicalize constant to RHS
2525   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2526   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2527   if (N0C && !N1C)
2528     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2529                        N1, N0, CarryIn);
2530 
2531   // fold (adde x, y, false) -> (addc x, y)
2532   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2533     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2534 
2535   return SDValue();
2536 }
2537 
2538 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2539   SDValue N0 = N->getOperand(0);
2540   SDValue N1 = N->getOperand(1);
2541   SDValue CarryIn = N->getOperand(2);
2542   SDLoc DL(N);
2543 
2544   // canonicalize constant to RHS
2545   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2546   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2547   if (N0C && !N1C)
2548     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2549 
2550   // fold (addcarry x, y, false) -> (uaddo x, y)
2551   if (isNullConstant(CarryIn)) {
2552     if (!LegalOperations ||
2553         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2554       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2555   }
2556 
2557   EVT CarryVT = CarryIn.getValueType();
2558 
2559   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2560   if (isNullConstant(N0) && isNullConstant(N1)) {
2561     EVT VT = N0.getValueType();
2562     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2563     AddToWorklist(CarryExt.getNode());
2564     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2565                                     DAG.getConstant(1, DL, VT)),
2566                      DAG.getConstant(0, DL, CarryVT));
2567   }
2568 
2569   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2570   if (isBitwiseNot(N0) && isNullConstant(N1)) {
2571     if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2572       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2573                                 DAG.getConstant(0, DL, N0.getValueType()),
2574                                 N0.getOperand(0), B);
2575       return CombineTo(N, Sub,
2576                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2577     }
2578   }
2579 
2580   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2581     return Combined;
2582 
2583   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2584     return Combined;
2585 
2586   return SDValue();
2587 }
2588 
2589 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2590                                        SDNode *N) {
2591   // Iff the flag result is dead:
2592   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2593   if ((N0.getOpcode() == ISD::ADD ||
2594        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2595       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2596     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2597                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2598 
2599   /**
2600    * When one of the addcarry argument is itself a carry, we may be facing
2601    * a diamond carry propagation. In which case we try to transform the DAG
2602    * to ensure linear carry propagation if that is possible.
2603    *
2604    * We are trying to get:
2605    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2606    */
2607   if (auto Y = getAsCarry(TLI, N1)) {
2608     /**
2609      *            (uaddo A, B)
2610      *             /       \
2611      *          Carry      Sum
2612      *            |          \
2613      *            | (addcarry *, 0, Z)
2614      *            |       /
2615      *             \   Carry
2616      *              |   /
2617      * (addcarry X, *, *)
2618      */
2619     if (Y.getOpcode() == ISD::UADDO &&
2620         CarryIn.getResNo() == 1 &&
2621         CarryIn.getOpcode() == ISD::ADDCARRY &&
2622         isNullConstant(CarryIn.getOperand(1)) &&
2623         CarryIn.getOperand(0) == Y.getValue(0)) {
2624       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2625                               Y.getOperand(0), Y.getOperand(1),
2626                               CarryIn.getOperand(2));
2627       AddToWorklist(NewY.getNode());
2628       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2629                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2630                          NewY.getValue(1));
2631     }
2632   }
2633 
2634   return SDValue();
2635 }
2636 
2637 // Since it may not be valid to emit a fold to zero for vector initializers
2638 // check if we can before folding.
2639 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2640                              SelectionDAG &DAG, bool LegalOperations) {
2641   if (!VT.isVector())
2642     return DAG.getConstant(0, DL, VT);
2643   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2644     return DAG.getConstant(0, DL, VT);
2645   return SDValue();
2646 }
2647 
2648 SDValue DAGCombiner::visitSUB(SDNode *N) {
2649   SDValue N0 = N->getOperand(0);
2650   SDValue N1 = N->getOperand(1);
2651   EVT VT = N0.getValueType();
2652   SDLoc DL(N);
2653 
2654   // fold vector ops
2655   if (VT.isVector()) {
2656     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2657       return FoldedVOp;
2658 
2659     // fold (sub x, 0) -> x, vector edition
2660     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2661       return N0;
2662   }
2663 
2664   // fold (sub x, x) -> 0
2665   // FIXME: Refactor this and xor and other similar operations together.
2666   if (N0 == N1)
2667     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2668   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2669       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2670     // fold (sub c1, c2) -> c1-c2
2671     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2672                                       N1.getNode());
2673   }
2674 
2675   if (SDValue NewSel = foldBinOpIntoSelect(N))
2676     return NewSel;
2677 
2678   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2679 
2680   // fold (sub x, c) -> (add x, -c)
2681   if (N1C) {
2682     return DAG.getNode(ISD::ADD, DL, VT, N0,
2683                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2684   }
2685 
2686   if (isNullOrNullSplat(N0)) {
2687     unsigned BitWidth = VT.getScalarSizeInBits();
2688     // Right-shifting everything out but the sign bit followed by negation is
2689     // the same as flipping arithmetic/logical shift type without the negation:
2690     // -(X >>u 31) -> (X >>s 31)
2691     // -(X >>s 31) -> (X >>u 31)
2692     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2693       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2694       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2695         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2696         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2697           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2698       }
2699     }
2700 
2701     // 0 - X --> 0 if the sub is NUW.
2702     if (N->getFlags().hasNoUnsignedWrap())
2703       return N0;
2704 
2705     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2706       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2707       // N1 must be 0 because negating the minimum signed value is undefined.
2708       if (N->getFlags().hasNoSignedWrap())
2709         return N0;
2710 
2711       // 0 - X --> X if X is 0 or the minimum signed value.
2712       return N1;
2713     }
2714   }
2715 
2716   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2717   if (isAllOnesOrAllOnesSplat(N0))
2718     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2719 
2720   // fold (A - (0-B)) -> A+B
2721   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2722     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2723 
2724   // fold A-(A-B) -> B
2725   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2726     return N1.getOperand(1);
2727 
2728   // fold (A+B)-A -> B
2729   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2730     return N0.getOperand(1);
2731 
2732   // fold (A+B)-B -> A
2733   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2734     return N0.getOperand(0);
2735 
2736   // fold C2-(A+C1) -> (C2-C1)-A
2737   if (N1.getOpcode() == ISD::ADD) {
2738     SDValue N11 = N1.getOperand(1);
2739     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2740         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2741       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2742       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2743     }
2744   }
2745 
2746   // fold ((A+(B+or-C))-B) -> A+or-C
2747   if (N0.getOpcode() == ISD::ADD &&
2748       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2749        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2750       N0.getOperand(1).getOperand(0) == N1)
2751     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2752                        N0.getOperand(1).getOperand(1));
2753 
2754   // fold ((A+(C+B))-B) -> A+C
2755   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2756       N0.getOperand(1).getOperand(1) == N1)
2757     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2758                        N0.getOperand(1).getOperand(0));
2759 
2760   // fold ((A-(B-C))-C) -> A-B
2761   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2762       N0.getOperand(1).getOperand(1) == N1)
2763     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2764                        N0.getOperand(1).getOperand(0));
2765 
2766   // fold (A-(B-C)) -> A+(C-B)
2767   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2768     return DAG.getNode(ISD::ADD, DL, VT, N0,
2769                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2770                                    N1.getOperand(0)));
2771 
2772   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2773   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2774     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2775         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2776       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2777                                 N1.getOperand(0).getOperand(1),
2778                                 N1.getOperand(1));
2779       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2780     }
2781     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2782         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2783       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2784                                 N1.getOperand(0),
2785                                 N1.getOperand(1).getOperand(1));
2786       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2787     }
2788   }
2789 
2790   // If either operand of a sub is undef, the result is undef
2791   if (N0.isUndef())
2792     return N0;
2793   if (N1.isUndef())
2794     return N1;
2795 
2796   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2797     return V;
2798 
2799   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2800     return V;
2801 
2802   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2803     return V;
2804 
2805   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2806   // rather than 'sub 0/1' (the sext should get folded).
2807   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2808   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2809       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2810       TLI.getBooleanContents(VT) ==
2811           TargetLowering::ZeroOrNegativeOneBooleanContent) {
2812     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2813     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2814   }
2815 
2816   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2817   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2818     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2819       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2820       SDValue S0 = N1.getOperand(0);
2821       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2822         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2823         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2824           if (C->getAPIntValue() == (OpSizeInBits - 1))
2825             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2826       }
2827     }
2828   }
2829 
2830   // If the relocation model supports it, consider symbol offsets.
2831   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2832     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2833       // fold (sub Sym, c) -> Sym-c
2834       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2835         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2836                                     GA->getOffset() -
2837                                         (uint64_t)N1C->getSExtValue());
2838       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2839       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2840         if (GA->getGlobal() == GB->getGlobal())
2841           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2842                                  DL, VT);
2843     }
2844 
2845   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2846   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2847     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2848     if (TN->getVT() == MVT::i1) {
2849       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2850                                  DAG.getConstant(1, DL, VT));
2851       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2852     }
2853   }
2854 
2855   // Prefer an add for more folding potential and possibly better codegen:
2856   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2857   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2858     SDValue ShAmt = N1.getOperand(1);
2859     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2860     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2861       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2862       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2863     }
2864   }
2865 
2866   return SDValue();
2867 }
2868 
2869 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2870   SDValue N0 = N->getOperand(0);
2871   SDValue N1 = N->getOperand(1);
2872   EVT VT = N0.getValueType();
2873   SDLoc DL(N);
2874 
2875   // fold vector ops
2876   if (VT.isVector()) {
2877     // TODO SimplifyVBinOp
2878 
2879     // fold (sub_sat x, 0) -> x, vector edition
2880     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2881       return N0;
2882   }
2883 
2884   // fold (sub_sat x, undef) -> 0
2885   if (N0.isUndef() || N1.isUndef())
2886     return DAG.getConstant(0, DL, VT);
2887 
2888   // fold (sub_sat x, x) -> 0
2889   if (N0 == N1)
2890     return DAG.getConstant(0, DL, VT);
2891 
2892   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2893       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2894     // fold (sub_sat c1, c2) -> c3
2895     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2896                                       N1.getNode());
2897   }
2898 
2899   // fold (sub_sat x, 0) -> x
2900   if (isNullConstant(N1))
2901     return N0;
2902 
2903   return SDValue();
2904 }
2905 
2906 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2907   SDValue N0 = N->getOperand(0);
2908   SDValue N1 = N->getOperand(1);
2909   EVT VT = N0.getValueType();
2910   SDLoc DL(N);
2911 
2912   // If the flag result is dead, turn this into an SUB.
2913   if (!N->hasAnyUseOfValue(1))
2914     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2915                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2916 
2917   // fold (subc x, x) -> 0 + no borrow
2918   if (N0 == N1)
2919     return CombineTo(N, DAG.getConstant(0, DL, VT),
2920                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2921 
2922   // fold (subc x, 0) -> x + no borrow
2923   if (isNullConstant(N1))
2924     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2925 
2926   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2927   if (isAllOnesConstant(N0))
2928     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2929                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2930 
2931   return SDValue();
2932 }
2933 
2934 SDValue DAGCombiner::visitSUBO(SDNode *N) {
2935   SDValue N0 = N->getOperand(0);
2936   SDValue N1 = N->getOperand(1);
2937   EVT VT = N0.getValueType();
2938   bool IsSigned = (ISD::SSUBO == N->getOpcode());
2939 
2940   EVT CarryVT = N->getValueType(1);
2941   SDLoc DL(N);
2942 
2943   // If the flag result is dead, turn this into an SUB.
2944   if (!N->hasAnyUseOfValue(1))
2945     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2946                      DAG.getUNDEF(CarryVT));
2947 
2948   // fold (subo x, x) -> 0 + no borrow
2949   if (N0 == N1)
2950     return CombineTo(N, DAG.getConstant(0, DL, VT),
2951                      DAG.getConstant(0, DL, CarryVT));
2952 
2953   // fold (subo x, 0) -> x + no borrow
2954   if (isNullOrNullSplat(N1))
2955     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2956 
2957   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2958   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
2959     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2960                      DAG.getConstant(0, DL, CarryVT));
2961 
2962   return SDValue();
2963 }
2964 
2965 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2966   SDValue N0 = N->getOperand(0);
2967   SDValue N1 = N->getOperand(1);
2968   SDValue CarryIn = N->getOperand(2);
2969 
2970   // fold (sube x, y, false) -> (subc x, y)
2971   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2972     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2973 
2974   return SDValue();
2975 }
2976 
2977 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2978   SDValue N0 = N->getOperand(0);
2979   SDValue N1 = N->getOperand(1);
2980   SDValue CarryIn = N->getOperand(2);
2981 
2982   // fold (subcarry x, y, false) -> (usubo x, y)
2983   if (isNullConstant(CarryIn)) {
2984     if (!LegalOperations ||
2985         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
2986       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2987   }
2988 
2989   return SDValue();
2990 }
2991 
2992 SDValue DAGCombiner::visitMUL(SDNode *N) {
2993   SDValue N0 = N->getOperand(0);
2994   SDValue N1 = N->getOperand(1);
2995   EVT VT = N0.getValueType();
2996 
2997   // fold (mul x, undef) -> 0
2998   if (N0.isUndef() || N1.isUndef())
2999     return DAG.getConstant(0, SDLoc(N), VT);
3000 
3001   bool N0IsConst = false;
3002   bool N1IsConst = false;
3003   bool N1IsOpaqueConst = false;
3004   bool N0IsOpaqueConst = false;
3005   APInt ConstValue0, ConstValue1;
3006   // fold vector ops
3007   if (VT.isVector()) {
3008     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3009       return FoldedVOp;
3010 
3011     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3012     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3013     assert((!N0IsConst ||
3014             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3015            "Splat APInt should be element width");
3016     assert((!N1IsConst ||
3017             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3018            "Splat APInt should be element width");
3019   } else {
3020     N0IsConst = isa<ConstantSDNode>(N0);
3021     if (N0IsConst) {
3022       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3023       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3024     }
3025     N1IsConst = isa<ConstantSDNode>(N1);
3026     if (N1IsConst) {
3027       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3028       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3029     }
3030   }
3031 
3032   // fold (mul c1, c2) -> c1*c2
3033   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3034     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3035                                       N0.getNode(), N1.getNode());
3036 
3037   // canonicalize constant to RHS (vector doesn't have to splat)
3038   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3039      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3040     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3041   // fold (mul x, 0) -> 0
3042   if (N1IsConst && ConstValue1.isNullValue())
3043     return N1;
3044   // fold (mul x, 1) -> x
3045   if (N1IsConst && ConstValue1.isOneValue())
3046     return N0;
3047 
3048   if (SDValue NewSel = foldBinOpIntoSelect(N))
3049     return NewSel;
3050 
3051   // fold (mul x, -1) -> 0-x
3052   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3053     SDLoc DL(N);
3054     return DAG.getNode(ISD::SUB, DL, VT,
3055                        DAG.getConstant(0, DL, VT), N0);
3056   }
3057   // fold (mul x, (1 << c)) -> x << c
3058   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3059       DAG.isKnownToBeAPowerOfTwo(N1) &&
3060       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3061     SDLoc DL(N);
3062     SDValue LogBase2 = BuildLogBase2(N1, DL);
3063     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3064     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3065     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3066   }
3067   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3068   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3069     unsigned Log2Val = (-ConstValue1).logBase2();
3070     SDLoc DL(N);
3071     // FIXME: If the input is something that is easily negated (e.g. a
3072     // single-use add), we should put the negate there.
3073     return DAG.getNode(ISD::SUB, DL, VT,
3074                        DAG.getConstant(0, DL, VT),
3075                        DAG.getNode(ISD::SHL, DL, VT, N0,
3076                             DAG.getConstant(Log2Val, DL,
3077                                       getShiftAmountTy(N0.getValueType()))));
3078   }
3079 
3080   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3081   // mul x, (2^N + 1) --> add (shl x, N), x
3082   // mul x, (2^N - 1) --> sub (shl x, N), x
3083   // Examples: x * 33 --> (x << 5) + x
3084   //           x * 15 --> (x << 4) - x
3085   //           x * -33 --> -((x << 5) + x)
3086   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3087   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3088     // TODO: We could handle more general decomposition of any constant by
3089     //       having the target set a limit on number of ops and making a
3090     //       callback to determine that sequence (similar to sqrt expansion).
3091     unsigned MathOp = ISD::DELETED_NODE;
3092     APInt MulC = ConstValue1.abs();
3093     if ((MulC - 1).isPowerOf2())
3094       MathOp = ISD::ADD;
3095     else if ((MulC + 1).isPowerOf2())
3096       MathOp = ISD::SUB;
3097 
3098     if (MathOp != ISD::DELETED_NODE) {
3099       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3100                                           : (MulC + 1).logBase2();
3101       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3102              "Not expecting multiply-by-constant that could have simplified");
3103       SDLoc DL(N);
3104       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3105                                 DAG.getConstant(ShAmt, DL, VT));
3106       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3107       if (ConstValue1.isNegative())
3108         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3109       return R;
3110     }
3111   }
3112 
3113   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3114   if (N0.getOpcode() == ISD::SHL &&
3115       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3116       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3117     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3118     if (isConstantOrConstantVector(C3))
3119       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3120   }
3121 
3122   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3123   // use.
3124   {
3125     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3126 
3127     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3128     if (N0.getOpcode() == ISD::SHL &&
3129         isConstantOrConstantVector(N0.getOperand(1)) &&
3130         N0.getNode()->hasOneUse()) {
3131       Sh = N0; Y = N1;
3132     } else if (N1.getOpcode() == ISD::SHL &&
3133                isConstantOrConstantVector(N1.getOperand(1)) &&
3134                N1.getNode()->hasOneUse()) {
3135       Sh = N1; Y = N0;
3136     }
3137 
3138     if (Sh.getNode()) {
3139       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3140       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3141     }
3142   }
3143 
3144   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3145   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3146       N0.getOpcode() == ISD::ADD &&
3147       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3148       isMulAddWithConstProfitable(N, N0, N1))
3149       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3150                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3151                                      N0.getOperand(0), N1),
3152                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3153                                      N0.getOperand(1), N1));
3154 
3155   // reassociate mul
3156   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3157     return RMUL;
3158 
3159   return SDValue();
3160 }
3161 
3162 /// Return true if divmod libcall is available.
3163 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3164                                      const TargetLowering &TLI) {
3165   RTLIB::Libcall LC;
3166   EVT NodeType = Node->getValueType(0);
3167   if (!NodeType.isSimple())
3168     return false;
3169   switch (NodeType.getSimpleVT().SimpleTy) {
3170   default: return false; // No libcall for vector types.
3171   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3172   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3173   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3174   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3175   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3176   }
3177 
3178   return TLI.getLibcallName(LC) != nullptr;
3179 }
3180 
3181 /// Issue divrem if both quotient and remainder are needed.
3182 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3183   if (Node->use_empty())
3184     return SDValue(); // This is a dead node, leave it alone.
3185 
3186   unsigned Opcode = Node->getOpcode();
3187   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3188   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3189 
3190   // DivMod lib calls can still work on non-legal types if using lib-calls.
3191   EVT VT = Node->getValueType(0);
3192   if (VT.isVector() || !VT.isInteger())
3193     return SDValue();
3194 
3195   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3196     return SDValue();
3197 
3198   // If DIVREM is going to get expanded into a libcall,
3199   // but there is no libcall available, then don't combine.
3200   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3201       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3202     return SDValue();
3203 
3204   // If div is legal, it's better to do the normal expansion
3205   unsigned OtherOpcode = 0;
3206   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3207     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3208     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3209       return SDValue();
3210   } else {
3211     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3212     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3213       return SDValue();
3214   }
3215 
3216   SDValue Op0 = Node->getOperand(0);
3217   SDValue Op1 = Node->getOperand(1);
3218   SDValue combined;
3219   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3220          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3221     SDNode *User = *UI;
3222     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3223         User->use_empty())
3224       continue;
3225     // Convert the other matching node(s), too;
3226     // otherwise, the DIVREM may get target-legalized into something
3227     // target-specific that we won't be able to recognize.
3228     unsigned UserOpc = User->getOpcode();
3229     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3230         User->getOperand(0) == Op0 &&
3231         User->getOperand(1) == Op1) {
3232       if (!combined) {
3233         if (UserOpc == OtherOpcode) {
3234           SDVTList VTs = DAG.getVTList(VT, VT);
3235           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3236         } else if (UserOpc == DivRemOpc) {
3237           combined = SDValue(User, 0);
3238         } else {
3239           assert(UserOpc == Opcode);
3240           continue;
3241         }
3242       }
3243       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3244         CombineTo(User, combined);
3245       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3246         CombineTo(User, combined.getValue(1));
3247     }
3248   }
3249   return combined;
3250 }
3251 
3252 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3253   SDValue N0 = N->getOperand(0);
3254   SDValue N1 = N->getOperand(1);
3255   EVT VT = N->getValueType(0);
3256   SDLoc DL(N);
3257 
3258   unsigned Opc = N->getOpcode();
3259   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3260   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3261 
3262   // X / undef -> undef
3263   // X % undef -> undef
3264   // X / 0 -> undef
3265   // X % 0 -> undef
3266   // NOTE: This includes vectors where any divisor element is zero/undef.
3267   if (DAG.isUndef(Opc, {N0, N1}))
3268     return DAG.getUNDEF(VT);
3269 
3270   // undef / X -> 0
3271   // undef % X -> 0
3272   if (N0.isUndef())
3273     return DAG.getConstant(0, DL, VT);
3274 
3275   // 0 / X -> 0
3276   // 0 % X -> 0
3277   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3278   if (N0C && N0C->isNullValue())
3279     return N0;
3280 
3281   // X / X -> 1
3282   // X % X -> 0
3283   if (N0 == N1)
3284     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3285 
3286   // X / 1 -> X
3287   // X % 1 -> 0
3288   // If this is a boolean op (single-bit element type), we can't have
3289   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3290   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3291   // it's a 1.
3292   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3293     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3294 
3295   return SDValue();
3296 }
3297 
3298 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3299   SDValue N0 = N->getOperand(0);
3300   SDValue N1 = N->getOperand(1);
3301   EVT VT = N->getValueType(0);
3302   EVT CCVT = getSetCCResultType(VT);
3303 
3304   // fold vector ops
3305   if (VT.isVector())
3306     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3307       return FoldedVOp;
3308 
3309   SDLoc DL(N);
3310 
3311   // fold (sdiv c1, c2) -> c1/c2
3312   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3313   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3314   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3315     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3316   // fold (sdiv X, -1) -> 0-X
3317   if (N1C && N1C->isAllOnesValue())
3318     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3319   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3320   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3321     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3322                          DAG.getConstant(1, DL, VT),
3323                          DAG.getConstant(0, DL, VT));
3324 
3325   if (SDValue V = simplifyDivRem(N, DAG))
3326     return V;
3327 
3328   if (SDValue NewSel = foldBinOpIntoSelect(N))
3329     return NewSel;
3330 
3331   // If we know the sign bits of both operands are zero, strength reduce to a
3332   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3333   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3334     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3335 
3336   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3337     // If the corresponding remainder node exists, update its users with
3338     // (Dividend - (Quotient * Divisor).
3339     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3340                                               { N0, N1 })) {
3341       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3342       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3343       AddToWorklist(Mul.getNode());
3344       AddToWorklist(Sub.getNode());
3345       CombineTo(RemNode, Sub);
3346     }
3347     return V;
3348   }
3349 
3350   // sdiv, srem -> sdivrem
3351   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3352   // true.  Otherwise, we break the simplification logic in visitREM().
3353   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3354   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3355     if (SDValue DivRem = useDivRem(N))
3356         return DivRem;
3357 
3358   return SDValue();
3359 }
3360 
3361 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3362   SDLoc DL(N);
3363   EVT VT = N->getValueType(0);
3364   EVT CCVT = getSetCCResultType(VT);
3365   unsigned BitWidth = VT.getScalarSizeInBits();
3366 
3367   // Helper for determining whether a value is a power-2 constant scalar or a
3368   // vector of such elements.
3369   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3370     if (C->isNullValue() || C->isOpaque())
3371       return false;
3372     if (C->getAPIntValue().isPowerOf2())
3373       return true;
3374     if ((-C->getAPIntValue()).isPowerOf2())
3375       return true;
3376     return false;
3377   };
3378 
3379   // fold (sdiv X, pow2) -> simple ops after legalize
3380   // FIXME: We check for the exact bit here because the generic lowering gives
3381   // better results in that case. The target-specific lowering should learn how
3382   // to handle exact sdivs efficiently.
3383   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3384     // Target-specific implementation of sdiv x, pow2.
3385     if (SDValue Res = BuildSDIVPow2(N))
3386       return Res;
3387 
3388     // Create constants that are functions of the shift amount value.
3389     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3390     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3391     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3392     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3393     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3394     if (!isConstantOrConstantVector(Inexact))
3395       return SDValue();
3396 
3397     // Splat the sign bit into the register
3398     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3399                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3400     AddToWorklist(Sign.getNode());
3401 
3402     // Add (N0 < 0) ? abs2 - 1 : 0;
3403     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3404     AddToWorklist(Srl.getNode());
3405     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3406     AddToWorklist(Add.getNode());
3407     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3408     AddToWorklist(Sra.getNode());
3409 
3410     // Special case: (sdiv X, 1) -> X
3411     // Special Case: (sdiv X, -1) -> 0-X
3412     SDValue One = DAG.getConstant(1, DL, VT);
3413     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3414     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3415     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3416     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3417     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3418 
3419     // If dividing by a positive value, we're done. Otherwise, the result must
3420     // be negated.
3421     SDValue Zero = DAG.getConstant(0, DL, VT);
3422     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3423 
3424     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3425     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3426     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3427     return Res;
3428   }
3429 
3430   // If integer divide is expensive and we satisfy the requirements, emit an
3431   // alternate sequence.  Targets may check function attributes for size/speed
3432   // trade-offs.
3433   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3434   if (isConstantOrConstantVector(N1) &&
3435       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3436     if (SDValue Op = BuildSDIV(N))
3437       return Op;
3438 
3439   return SDValue();
3440 }
3441 
3442 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3443   SDValue N0 = N->getOperand(0);
3444   SDValue N1 = N->getOperand(1);
3445   EVT VT = N->getValueType(0);
3446   EVT CCVT = getSetCCResultType(VT);
3447 
3448   // fold vector ops
3449   if (VT.isVector())
3450     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3451       return FoldedVOp;
3452 
3453   SDLoc DL(N);
3454 
3455   // fold (udiv c1, c2) -> c1/c2
3456   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3457   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3458   if (N0C && N1C)
3459     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3460                                                     N0C, N1C))
3461       return Folded;
3462   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3463   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3464     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3465                          DAG.getConstant(1, DL, VT),
3466                          DAG.getConstant(0, DL, VT));
3467 
3468   if (SDValue V = simplifyDivRem(N, DAG))
3469     return V;
3470 
3471   if (SDValue NewSel = foldBinOpIntoSelect(N))
3472     return NewSel;
3473 
3474   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3475     // If the corresponding remainder node exists, update its users with
3476     // (Dividend - (Quotient * Divisor).
3477     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3478                                               { N0, N1 })) {
3479       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3480       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3481       AddToWorklist(Mul.getNode());
3482       AddToWorklist(Sub.getNode());
3483       CombineTo(RemNode, Sub);
3484     }
3485     return V;
3486   }
3487 
3488   // sdiv, srem -> sdivrem
3489   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3490   // true.  Otherwise, we break the simplification logic in visitREM().
3491   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3492   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3493     if (SDValue DivRem = useDivRem(N))
3494         return DivRem;
3495 
3496   return SDValue();
3497 }
3498 
3499 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3500   SDLoc DL(N);
3501   EVT VT = N->getValueType(0);
3502 
3503   // fold (udiv x, (1 << c)) -> x >>u c
3504   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3505       DAG.isKnownToBeAPowerOfTwo(N1)) {
3506     SDValue LogBase2 = BuildLogBase2(N1, DL);
3507     AddToWorklist(LogBase2.getNode());
3508 
3509     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3510     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3511     AddToWorklist(Trunc.getNode());
3512     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3513   }
3514 
3515   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3516   if (N1.getOpcode() == ISD::SHL) {
3517     SDValue N10 = N1.getOperand(0);
3518     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3519         DAG.isKnownToBeAPowerOfTwo(N10)) {
3520       SDValue LogBase2 = BuildLogBase2(N10, DL);
3521       AddToWorklist(LogBase2.getNode());
3522 
3523       EVT ADDVT = N1.getOperand(1).getValueType();
3524       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3525       AddToWorklist(Trunc.getNode());
3526       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3527       AddToWorklist(Add.getNode());
3528       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3529     }
3530   }
3531 
3532   // fold (udiv x, c) -> alternate
3533   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3534   if (isConstantOrConstantVector(N1) &&
3535       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3536     if (SDValue Op = BuildUDIV(N))
3537       return Op;
3538 
3539   return SDValue();
3540 }
3541 
3542 // handles ISD::SREM and ISD::UREM
3543 SDValue DAGCombiner::visitREM(SDNode *N) {
3544   unsigned Opcode = N->getOpcode();
3545   SDValue N0 = N->getOperand(0);
3546   SDValue N1 = N->getOperand(1);
3547   EVT VT = N->getValueType(0);
3548   EVT CCVT = getSetCCResultType(VT);
3549 
3550   bool isSigned = (Opcode == ISD::SREM);
3551   SDLoc DL(N);
3552 
3553   // fold (rem c1, c2) -> c1%c2
3554   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3555   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3556   if (N0C && N1C)
3557     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3558       return Folded;
3559   // fold (urem X, -1) -> select(X == -1, 0, x)
3560   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3561     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3562                          DAG.getConstant(0, DL, VT), N0);
3563 
3564   if (SDValue V = simplifyDivRem(N, DAG))
3565     return V;
3566 
3567   if (SDValue NewSel = foldBinOpIntoSelect(N))
3568     return NewSel;
3569 
3570   if (isSigned) {
3571     // If we know the sign bits of both operands are zero, strength reduce to a
3572     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3573     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3574       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3575   } else {
3576     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3577     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3578       // fold (urem x, pow2) -> (and x, pow2-1)
3579       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3580       AddToWorklist(Add.getNode());
3581       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3582     }
3583     if (N1.getOpcode() == ISD::SHL &&
3584         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3585       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3586       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3587       AddToWorklist(Add.getNode());
3588       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3589     }
3590   }
3591 
3592   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3593 
3594   // If X/C can be simplified by the division-by-constant logic, lower
3595   // X%C to the equivalent of X-X/C*C.
3596   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3597   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3598   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3599   // combine will not return a DIVREM.  Regardless, checking cheapness here
3600   // makes sense since the simplification results in fatter code.
3601   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3602     SDValue OptimizedDiv =
3603         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3604     if (OptimizedDiv.getNode()) {
3605       // If the equivalent Div node also exists, update its users.
3606       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3607       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3608                                                 { N0, N1 }))
3609         CombineTo(DivNode, OptimizedDiv);
3610       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3611       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3612       AddToWorklist(OptimizedDiv.getNode());
3613       AddToWorklist(Mul.getNode());
3614       return Sub;
3615     }
3616   }
3617 
3618   // sdiv, srem -> sdivrem
3619   if (SDValue DivRem = useDivRem(N))
3620     return DivRem.getValue(1);
3621 
3622   return SDValue();
3623 }
3624 
3625 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3626   SDValue N0 = N->getOperand(0);
3627   SDValue N1 = N->getOperand(1);
3628   EVT VT = N->getValueType(0);
3629   SDLoc DL(N);
3630 
3631   if (VT.isVector()) {
3632     // fold (mulhs x, 0) -> 0
3633     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3634       return N1;
3635     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3636       return N0;
3637   }
3638 
3639   // fold (mulhs x, 0) -> 0
3640   if (isNullConstant(N1))
3641     return N1;
3642   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3643   if (isOneConstant(N1))
3644     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3645                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3646                                        getShiftAmountTy(N0.getValueType())));
3647 
3648   // fold (mulhs x, undef) -> 0
3649   if (N0.isUndef() || N1.isUndef())
3650     return DAG.getConstant(0, DL, VT);
3651 
3652   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3653   // plus a shift.
3654   if (VT.isSimple() && !VT.isVector()) {
3655     MVT Simple = VT.getSimpleVT();
3656     unsigned SimpleSize = Simple.getSizeInBits();
3657     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3658     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3659       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3660       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3661       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3662       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3663             DAG.getConstant(SimpleSize, DL,
3664                             getShiftAmountTy(N1.getValueType())));
3665       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3666     }
3667   }
3668 
3669   return SDValue();
3670 }
3671 
3672 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3673   SDValue N0 = N->getOperand(0);
3674   SDValue N1 = N->getOperand(1);
3675   EVT VT = N->getValueType(0);
3676   SDLoc DL(N);
3677 
3678   if (VT.isVector()) {
3679     // fold (mulhu x, 0) -> 0
3680     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3681       return N1;
3682     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3683       return N0;
3684   }
3685 
3686   // fold (mulhu x, 0) -> 0
3687   if (isNullConstant(N1))
3688     return N1;
3689   // fold (mulhu x, 1) -> 0
3690   if (isOneConstant(N1))
3691     return DAG.getConstant(0, DL, N0.getValueType());
3692   // fold (mulhu x, undef) -> 0
3693   if (N0.isUndef() || N1.isUndef())
3694     return DAG.getConstant(0, DL, VT);
3695 
3696   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3697   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3698       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3699     SDLoc DL(N);
3700     unsigned NumEltBits = VT.getScalarSizeInBits();
3701     SDValue LogBase2 = BuildLogBase2(N1, DL);
3702     SDValue SRLAmt = DAG.getNode(
3703         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3704     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3705     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3706     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3707   }
3708 
3709   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3710   // plus a shift.
3711   if (VT.isSimple() && !VT.isVector()) {
3712     MVT Simple = VT.getSimpleVT();
3713     unsigned SimpleSize = Simple.getSizeInBits();
3714     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3715     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3716       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3717       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3718       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3719       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3720             DAG.getConstant(SimpleSize, DL,
3721                             getShiftAmountTy(N1.getValueType())));
3722       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3723     }
3724   }
3725 
3726   return SDValue();
3727 }
3728 
3729 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3730 /// give the opcodes for the two computations that are being performed. Return
3731 /// true if a simplification was made.
3732 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3733                                                 unsigned HiOp) {
3734   // If the high half is not needed, just compute the low half.
3735   bool HiExists = N->hasAnyUseOfValue(1);
3736   if (!HiExists && (!LegalOperations ||
3737                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3738     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3739     return CombineTo(N, Res, Res);
3740   }
3741 
3742   // If the low half is not needed, just compute the high half.
3743   bool LoExists = N->hasAnyUseOfValue(0);
3744   if (!LoExists && (!LegalOperations ||
3745                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3746     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3747     return CombineTo(N, Res, Res);
3748   }
3749 
3750   // If both halves are used, return as it is.
3751   if (LoExists && HiExists)
3752     return SDValue();
3753 
3754   // If the two computed results can be simplified separately, separate them.
3755   if (LoExists) {
3756     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3757     AddToWorklist(Lo.getNode());
3758     SDValue LoOpt = combine(Lo.getNode());
3759     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3760         (!LegalOperations ||
3761          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3762       return CombineTo(N, LoOpt, LoOpt);
3763   }
3764 
3765   if (HiExists) {
3766     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3767     AddToWorklist(Hi.getNode());
3768     SDValue HiOpt = combine(Hi.getNode());
3769     if (HiOpt.getNode() && HiOpt != Hi &&
3770         (!LegalOperations ||
3771          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3772       return CombineTo(N, HiOpt, HiOpt);
3773   }
3774 
3775   return SDValue();
3776 }
3777 
3778 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3779   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3780     return Res;
3781 
3782   EVT VT = N->getValueType(0);
3783   SDLoc DL(N);
3784 
3785   // If the type is twice as wide is legal, transform the mulhu to a wider
3786   // multiply plus a shift.
3787   if (VT.isSimple() && !VT.isVector()) {
3788     MVT Simple = VT.getSimpleVT();
3789     unsigned SimpleSize = Simple.getSizeInBits();
3790     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3791     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3792       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3793       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3794       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3795       // Compute the high part as N1.
3796       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3797             DAG.getConstant(SimpleSize, DL,
3798                             getShiftAmountTy(Lo.getValueType())));
3799       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3800       // Compute the low part as N0.
3801       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3802       return CombineTo(N, Lo, Hi);
3803     }
3804   }
3805 
3806   return SDValue();
3807 }
3808 
3809 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3810   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3811     return Res;
3812 
3813   EVT VT = N->getValueType(0);
3814   SDLoc DL(N);
3815 
3816   // If the type is twice as wide is legal, transform the mulhu to a wider
3817   // multiply plus a shift.
3818   if (VT.isSimple() && !VT.isVector()) {
3819     MVT Simple = VT.getSimpleVT();
3820     unsigned SimpleSize = Simple.getSizeInBits();
3821     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3822     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3823       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3824       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3825       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3826       // Compute the high part as N1.
3827       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3828             DAG.getConstant(SimpleSize, DL,
3829                             getShiftAmountTy(Lo.getValueType())));
3830       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3831       // Compute the low part as N0.
3832       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3833       return CombineTo(N, Lo, Hi);
3834     }
3835   }
3836 
3837   return SDValue();
3838 }
3839 
3840 SDValue DAGCombiner::visitMULO(SDNode *N) {
3841   bool IsSigned = (ISD::SMULO == N->getOpcode());
3842 
3843   // (mulo x, 2) -> (addo x, x)
3844   if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
3845     if (C2->getAPIntValue() == 2)
3846       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3847                          N->getVTList(), N->getOperand(0), N->getOperand(0));
3848 
3849   return SDValue();
3850 }
3851 
3852 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3853   SDValue N0 = N->getOperand(0);
3854   SDValue N1 = N->getOperand(1);
3855   EVT VT = N0.getValueType();
3856 
3857   // fold vector ops
3858   if (VT.isVector())
3859     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3860       return FoldedVOp;
3861 
3862   // fold operation with constant operands.
3863   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3864   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3865   if (N0C && N1C)
3866     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3867 
3868   // canonicalize constant to RHS
3869   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3870      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3871     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3872 
3873   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3874   // Only do this if the current op isn't legal and the flipped is.
3875   unsigned Opcode = N->getOpcode();
3876   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3877   if (!TLI.isOperationLegal(Opcode, VT) &&
3878       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3879       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3880     unsigned AltOpcode;
3881     switch (Opcode) {
3882     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3883     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3884     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3885     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3886     default: llvm_unreachable("Unknown MINMAX opcode");
3887     }
3888     if (TLI.isOperationLegal(AltOpcode, VT))
3889       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3890   }
3891 
3892   return SDValue();
3893 }
3894 
3895 /// If this is a bitwise logic instruction and both operands have the same
3896 /// opcode, try to sink the other opcode after the logic instruction.
3897 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
3898   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3899   EVT VT = N0.getValueType();
3900   unsigned LogicOpcode = N->getOpcode();
3901   unsigned HandOpcode = N0.getOpcode();
3902   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
3903           LogicOpcode == ISD::XOR) && "Expected logic opcode");
3904   assert(HandOpcode == N1.getOpcode() && "Bad input!");
3905 
3906   // Bail early if none of these transforms apply.
3907   if (N0.getNumOperands() == 0)
3908     return SDValue();
3909 
3910   // FIXME: We should check number of uses of the operands to not increase
3911   //        the instruction count for all transforms.
3912 
3913   // Handle size-changing casts.
3914   SDValue X = N0.getOperand(0);
3915   SDValue Y = N1.getOperand(0);
3916   EVT XVT = X.getValueType();
3917   SDLoc DL(N);
3918   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
3919       HandOpcode == ISD::SIGN_EXTEND) {
3920     // If both operands have other uses, this transform would create extra
3921     // instructions without eliminating anything.
3922     if (!N0.hasOneUse() && !N1.hasOneUse())
3923       return SDValue();
3924     // We need matching integer source types.
3925     if (XVT != Y.getValueType())
3926       return SDValue();
3927     // Don't create an illegal op during or after legalization. Don't ever
3928     // create an unsupported vector op.
3929     if ((VT.isVector() || LegalOperations) &&
3930         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
3931       return SDValue();
3932     // Avoid infinite looping with PromoteIntBinOp.
3933     // TODO: Should we apply desirable/legal constraints to all opcodes?
3934     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
3935         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
3936       return SDValue();
3937     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
3938     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3939     return DAG.getNode(HandOpcode, DL, VT, Logic);
3940   }
3941 
3942   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
3943   if (HandOpcode == ISD::TRUNCATE) {
3944     // If both operands have other uses, this transform would create extra
3945     // instructions without eliminating anything.
3946     if (!N0.hasOneUse() && !N1.hasOneUse())
3947       return SDValue();
3948     // We need matching source types.
3949     if (XVT != Y.getValueType())
3950       return SDValue();
3951     // Don't create an illegal op during or after legalization.
3952     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
3953       return SDValue();
3954     // Be extra careful sinking truncate. If it's free, there's no benefit in
3955     // widening a binop. Also, don't create a logic op on an illegal type.
3956     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
3957       return SDValue();
3958     if (!TLI.isTypeLegal(XVT))
3959       return SDValue();
3960     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3961     return DAG.getNode(HandOpcode, DL, VT, Logic);
3962   }
3963 
3964   // For binops SHL/SRL/SRA/AND:
3965   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
3966   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
3967        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
3968       N0.getOperand(1) == N1.getOperand(1)) {
3969     // If either operand has other uses, this transform is not an improvement.
3970     if (!N0.hasOneUse() || !N1.hasOneUse())
3971       return SDValue();
3972     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3973     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
3974   }
3975 
3976   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
3977   if (HandOpcode == ISD::BSWAP) {
3978     // If either operand has other uses, this transform is not an improvement.
3979     if (!N0.hasOneUse() || !N1.hasOneUse())
3980       return SDValue();
3981     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3982     return DAG.getNode(HandOpcode, DL, VT, Logic);
3983   }
3984 
3985   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3986   // Only perform this optimization up until type legalization, before
3987   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3988   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3989   // we don't want to undo this promotion.
3990   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3991   // on scalars.
3992   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
3993        Level <= AfterLegalizeTypes) {
3994     // Input types must be integer and the same.
3995     if (XVT.isInteger() && XVT == Y.getValueType()) {
3996       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
3997       return DAG.getNode(HandOpcode, DL, VT, Logic);
3998     }
3999   }
4000 
4001   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4002   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4003   // If both shuffles use the same mask, and both shuffle within a single
4004   // vector, then it is worthwhile to move the swizzle after the operation.
4005   // The type-legalizer generates this pattern when loading illegal
4006   // vector types from memory. In many cases this allows additional shuffle
4007   // optimizations.
4008   // There are other cases where moving the shuffle after the xor/and/or
4009   // is profitable even if shuffles don't perform a swizzle.
4010   // If both shuffles use the same mask, and both shuffles have the same first
4011   // or second operand, then it might still be profitable to move the shuffle
4012   // after the xor/and/or operation.
4013   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4014     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4015     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4016     assert(X.getValueType() == Y.getValueType() &&
4017            "Inputs to shuffles are not the same type");
4018 
4019     // Check that both shuffles use the same mask. The masks are known to be of
4020     // the same length because the result vector type is the same.
4021     // Check also that shuffles have only one use to avoid introducing extra
4022     // instructions.
4023     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4024         !SVN0->getMask().equals(SVN1->getMask()))
4025       return SDValue();
4026 
4027     // Don't try to fold this node if it requires introducing a
4028     // build vector of all zeros that might be illegal at this stage.
4029     SDValue ShOp = N0.getOperand(1);
4030     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4031       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4032 
4033     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4034     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4035       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4036                                   N0.getOperand(0), N1.getOperand(0));
4037       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4038     }
4039 
4040     // Don't try to fold this node if it requires introducing a
4041     // build vector of all zeros that might be illegal at this stage.
4042     ShOp = N0.getOperand(0);
4043     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4044       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4045 
4046     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4047     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4048       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4049                                   N1.getOperand(1));
4050       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4051     }
4052   }
4053 
4054   return SDValue();
4055 }
4056 
4057 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4058 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4059                                        const SDLoc &DL) {
4060   SDValue LL, LR, RL, RR, N0CC, N1CC;
4061   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4062       !isSetCCEquivalent(N1, RL, RR, N1CC))
4063     return SDValue();
4064 
4065   assert(N0.getValueType() == N1.getValueType() &&
4066          "Unexpected operand types for bitwise logic op");
4067   assert(LL.getValueType() == LR.getValueType() &&
4068          RL.getValueType() == RR.getValueType() &&
4069          "Unexpected operand types for setcc");
4070 
4071   // If we're here post-legalization or the logic op type is not i1, the logic
4072   // op type must match a setcc result type. Also, all folds require new
4073   // operations on the left and right operands, so those types must match.
4074   EVT VT = N0.getValueType();
4075   EVT OpVT = LL.getValueType();
4076   if (LegalOperations || VT.getScalarType() != MVT::i1)
4077     if (VT != getSetCCResultType(OpVT))
4078       return SDValue();
4079   if (OpVT != RL.getValueType())
4080     return SDValue();
4081 
4082   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4083   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4084   bool IsInteger = OpVT.isInteger();
4085   if (LR == RR && CC0 == CC1 && IsInteger) {
4086     bool IsZero = isNullOrNullSplat(LR);
4087     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4088 
4089     // All bits clear?
4090     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4091     // All sign bits clear?
4092     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4093     // Any bits set?
4094     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4095     // Any sign bits set?
4096     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4097 
4098     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4099     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4100     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4101     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4102     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4103       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4104       AddToWorklist(Or.getNode());
4105       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4106     }
4107 
4108     // All bits set?
4109     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4110     // All sign bits set?
4111     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4112     // Any bits clear?
4113     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4114     // Any sign bits clear?
4115     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4116 
4117     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4118     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4119     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4120     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4121     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4122       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4123       AddToWorklist(And.getNode());
4124       return DAG.getSetCC(DL, VT, And, LR, CC1);
4125     }
4126   }
4127 
4128   // TODO: What is the 'or' equivalent of this fold?
4129   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4130   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4131       IsInteger && CC0 == ISD::SETNE &&
4132       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4133        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4134     SDValue One = DAG.getConstant(1, DL, OpVT);
4135     SDValue Two = DAG.getConstant(2, DL, OpVT);
4136     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4137     AddToWorklist(Add.getNode());
4138     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4139   }
4140 
4141   // Try more general transforms if the predicates match and the only user of
4142   // the compares is the 'and' or 'or'.
4143   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4144       N0.hasOneUse() && N1.hasOneUse()) {
4145     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4146     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4147     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4148       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4149       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4150       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4151       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4152       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4153     }
4154 
4155     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4156     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4157       // Match a shared variable operand and 2 non-opaque constant operands.
4158       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4159       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4160       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4161         // Canonicalize larger constant as C0.
4162         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4163           std::swap(C0, C1);
4164 
4165         // The difference of the constants must be a single bit.
4166         const APInt &C0Val = C0->getAPIntValue();
4167         const APInt &C1Val = C1->getAPIntValue();
4168         if ((C0Val - C1Val).isPowerOf2()) {
4169           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4170           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4171           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4172           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4173           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4174           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4175           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4176           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4177         }
4178       }
4179     }
4180   }
4181 
4182   // Canonicalize equivalent operands to LL == RL.
4183   if (LL == RR && LR == RL) {
4184     CC1 = ISD::getSetCCSwappedOperands(CC1);
4185     std::swap(RL, RR);
4186   }
4187 
4188   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4189   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4190   if (LL == RL && LR == RR) {
4191     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4192                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4193     if (NewCC != ISD::SETCC_INVALID &&
4194         (!LegalOperations ||
4195          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4196           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4197       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4198   }
4199 
4200   return SDValue();
4201 }
4202 
4203 /// This contains all DAGCombine rules which reduce two values combined by
4204 /// an And operation to a single value. This makes them reusable in the context
4205 /// of visitSELECT(). Rules involving constants are not included as
4206 /// visitSELECT() already handles those cases.
4207 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4208   EVT VT = N1.getValueType();
4209   SDLoc DL(N);
4210 
4211   // fold (and x, undef) -> 0
4212   if (N0.isUndef() || N1.isUndef())
4213     return DAG.getConstant(0, DL, VT);
4214 
4215   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4216     return V;
4217 
4218   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4219       VT.getSizeInBits() <= 64) {
4220     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4221       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4222         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4223         // immediate for an add, but it is legal if its top c2 bits are set,
4224         // transform the ADD so the immediate doesn't need to be materialized
4225         // in a register.
4226         APInt ADDC = ADDI->getAPIntValue();
4227         APInt SRLC = SRLI->getAPIntValue();
4228         if (ADDC.getMinSignedBits() <= 64 &&
4229             SRLC.ult(VT.getSizeInBits()) &&
4230             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4231           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4232                                              SRLC.getZExtValue());
4233           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4234             ADDC |= Mask;
4235             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4236               SDLoc DL0(N0);
4237               SDValue NewAdd =
4238                 DAG.getNode(ISD::ADD, DL0, VT,
4239                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4240               CombineTo(N0.getNode(), NewAdd);
4241               // Return N so it doesn't get rechecked!
4242               return SDValue(N, 0);
4243             }
4244           }
4245         }
4246       }
4247     }
4248   }
4249 
4250   // Reduce bit extract of low half of an integer to the narrower type.
4251   // (and (srl i64:x, K), KMask) ->
4252   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4253   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4254     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4255       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4256         unsigned Size = VT.getSizeInBits();
4257         const APInt &AndMask = CAnd->getAPIntValue();
4258         unsigned ShiftBits = CShift->getZExtValue();
4259 
4260         // Bail out, this node will probably disappear anyway.
4261         if (ShiftBits == 0)
4262           return SDValue();
4263 
4264         unsigned MaskBits = AndMask.countTrailingOnes();
4265         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4266 
4267         if (AndMask.isMask() &&
4268             // Required bits must not span the two halves of the integer and
4269             // must fit in the half size type.
4270             (ShiftBits + MaskBits <= Size / 2) &&
4271             TLI.isNarrowingProfitable(VT, HalfVT) &&
4272             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4273             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4274             TLI.isTruncateFree(VT, HalfVT) &&
4275             TLI.isZExtFree(HalfVT, VT)) {
4276           // The isNarrowingProfitable is to avoid regressions on PPC and
4277           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4278           // on downstream users of this. Those patterns could probably be
4279           // extended to handle extensions mixed in.
4280 
4281           SDValue SL(N0);
4282           assert(MaskBits <= Size);
4283 
4284           // Extracting the highest bit of the low half.
4285           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4286           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4287                                       N0.getOperand(0));
4288 
4289           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4290           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4291           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4292           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4293           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4294         }
4295       }
4296     }
4297   }
4298 
4299   return SDValue();
4300 }
4301 
4302 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4303                                    EVT LoadResultTy, EVT &ExtVT) {
4304   if (!AndC->getAPIntValue().isMask())
4305     return false;
4306 
4307   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4308 
4309   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4310   EVT LoadedVT = LoadN->getMemoryVT();
4311 
4312   if (ExtVT == LoadedVT &&
4313       (!LegalOperations ||
4314        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4315     // ZEXTLOAD will match without needing to change the size of the value being
4316     // loaded.
4317     return true;
4318   }
4319 
4320   // Do not change the width of a volatile load.
4321   if (LoadN->isVolatile())
4322     return false;
4323 
4324   // Do not generate loads of non-round integer types since these can
4325   // be expensive (and would be wrong if the type is not byte sized).
4326   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4327     return false;
4328 
4329   if (LegalOperations &&
4330       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4331     return false;
4332 
4333   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4334     return false;
4335 
4336   return true;
4337 }
4338 
4339 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4340                                     ISD::LoadExtType ExtType, EVT &MemVT,
4341                                     unsigned ShAmt) {
4342   if (!LDST)
4343     return false;
4344   // Only allow byte offsets.
4345   if (ShAmt % 8)
4346     return false;
4347 
4348   // Do not generate loads of non-round integer types since these can
4349   // be expensive (and would be wrong if the type is not byte sized).
4350   if (!MemVT.isRound())
4351     return false;
4352 
4353   // Don't change the width of a volatile load.
4354   if (LDST->isVolatile())
4355     return false;
4356 
4357   // Verify that we are actually reducing a load width here.
4358   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4359     return false;
4360 
4361   // Ensure that this isn't going to produce an unsupported unaligned access.
4362   if (ShAmt &&
4363       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4364                               LDST->getAddressSpace(), ShAmt / 8))
4365     return false;
4366 
4367   // It's not possible to generate a constant of extended or untyped type.
4368   EVT PtrType = LDST->getBasePtr().getValueType();
4369   if (PtrType == MVT::Untyped || PtrType.isExtended())
4370     return false;
4371 
4372   if (isa<LoadSDNode>(LDST)) {
4373     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4374     // Don't transform one with multiple uses, this would require adding a new
4375     // load.
4376     if (!SDValue(Load, 0).hasOneUse())
4377       return false;
4378 
4379     if (LegalOperations &&
4380         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4381       return false;
4382 
4383     // For the transform to be legal, the load must produce only two values
4384     // (the value loaded and the chain).  Don't transform a pre-increment
4385     // load, for example, which produces an extra value.  Otherwise the
4386     // transformation is not equivalent, and the downstream logic to replace
4387     // uses gets things wrong.
4388     if (Load->getNumValues() > 2)
4389       return false;
4390 
4391     // If the load that we're shrinking is an extload and we're not just
4392     // discarding the extension we can't simply shrink the load. Bail.
4393     // TODO: It would be possible to merge the extensions in some cases.
4394     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4395         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4396       return false;
4397 
4398     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4399       return false;
4400   } else {
4401     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4402     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4403     // Can't write outside the original store
4404     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4405       return false;
4406 
4407     if (LegalOperations &&
4408         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4409       return false;
4410   }
4411   return true;
4412 }
4413 
4414 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4415                                     SmallVectorImpl<LoadSDNode*> &Loads,
4416                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4417                                     ConstantSDNode *Mask,
4418                                     SDNode *&NodeToMask) {
4419   // Recursively search for the operands, looking for loads which can be
4420   // narrowed.
4421   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4422     SDValue Op = N->getOperand(i);
4423 
4424     if (Op.getValueType().isVector())
4425       return false;
4426 
4427     // Some constants may need fixing up later if they are too large.
4428     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4429       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4430           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4431         NodesWithConsts.insert(N);
4432       continue;
4433     }
4434 
4435     if (!Op.hasOneUse())
4436       return false;
4437 
4438     switch(Op.getOpcode()) {
4439     case ISD::LOAD: {
4440       auto *Load = cast<LoadSDNode>(Op);
4441       EVT ExtVT;
4442       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4443           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4444 
4445         // ZEXTLOAD is already small enough.
4446         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4447             ExtVT.bitsGE(Load->getMemoryVT()))
4448           continue;
4449 
4450         // Use LE to convert equal sized loads to zext.
4451         if (ExtVT.bitsLE(Load->getMemoryVT()))
4452           Loads.push_back(Load);
4453 
4454         continue;
4455       }
4456       return false;
4457     }
4458     case ISD::ZERO_EXTEND:
4459     case ISD::AssertZext: {
4460       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4461       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4462       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4463         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4464         Op.getOperand(0).getValueType();
4465 
4466       // We can accept extending nodes if the mask is wider or an equal
4467       // width to the original type.
4468       if (ExtVT.bitsGE(VT))
4469         continue;
4470       break;
4471     }
4472     case ISD::OR:
4473     case ISD::XOR:
4474     case ISD::AND:
4475       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4476                              NodeToMask))
4477         return false;
4478       continue;
4479     }
4480 
4481     // Allow one node which will masked along with any loads found.
4482     if (NodeToMask)
4483       return false;
4484 
4485     // Also ensure that the node to be masked only produces one data result.
4486     NodeToMask = Op.getNode();
4487     if (NodeToMask->getNumValues() > 1) {
4488       bool HasValue = false;
4489       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4490         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4491         if (VT != MVT::Glue && VT != MVT::Other) {
4492           if (HasValue) {
4493             NodeToMask = nullptr;
4494             return false;
4495           }
4496           HasValue = true;
4497         }
4498       }
4499       assert(HasValue && "Node to be masked has no data result?");
4500     }
4501   }
4502   return true;
4503 }
4504 
4505 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4506   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4507   if (!Mask)
4508     return false;
4509 
4510   if (!Mask->getAPIntValue().isMask())
4511     return false;
4512 
4513   // No need to do anything if the and directly uses a load.
4514   if (isa<LoadSDNode>(N->getOperand(0)))
4515     return false;
4516 
4517   SmallVector<LoadSDNode*, 8> Loads;
4518   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4519   SDNode *FixupNode = nullptr;
4520   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4521     if (Loads.size() == 0)
4522       return false;
4523 
4524     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4525     SDValue MaskOp = N->getOperand(1);
4526 
4527     // If it exists, fixup the single node we allow in the tree that needs
4528     // masking.
4529     if (FixupNode) {
4530       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4531       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4532                                 FixupNode->getValueType(0),
4533                                 SDValue(FixupNode, 0), MaskOp);
4534       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4535       if (And.getOpcode() == ISD ::AND)
4536         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4537     }
4538 
4539     // Narrow any constants that need it.
4540     for (auto *LogicN : NodesWithConsts) {
4541       SDValue Op0 = LogicN->getOperand(0);
4542       SDValue Op1 = LogicN->getOperand(1);
4543 
4544       if (isa<ConstantSDNode>(Op0))
4545           std::swap(Op0, Op1);
4546 
4547       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4548                                 Op1, MaskOp);
4549 
4550       DAG.UpdateNodeOperands(LogicN, Op0, And);
4551     }
4552 
4553     // Create narrow loads.
4554     for (auto *Load : Loads) {
4555       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4556       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4557                                 SDValue(Load, 0), MaskOp);
4558       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4559       if (And.getOpcode() == ISD ::AND)
4560         And = SDValue(
4561             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4562       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4563       assert(NewLoad &&
4564              "Shouldn't be masking the load if it can't be narrowed");
4565       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4566     }
4567     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4568     return true;
4569   }
4570   return false;
4571 }
4572 
4573 // Unfold
4574 //    x &  (-1 'logical shift' y)
4575 // To
4576 //    (x 'opposite logical shift' y) 'logical shift' y
4577 // if it is better for performance.
4578 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4579   assert(N->getOpcode() == ISD::AND);
4580 
4581   SDValue N0 = N->getOperand(0);
4582   SDValue N1 = N->getOperand(1);
4583 
4584   // Do we actually prefer shifts over mask?
4585   if (!TLI.preferShiftsToClearExtremeBits(N0))
4586     return SDValue();
4587 
4588   // Try to match  (-1 '[outer] logical shift' y)
4589   unsigned OuterShift;
4590   unsigned InnerShift; // The opposite direction to the OuterShift.
4591   SDValue Y;           // Shift amount.
4592   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4593     if (!M.hasOneUse())
4594       return false;
4595     OuterShift = M->getOpcode();
4596     if (OuterShift == ISD::SHL)
4597       InnerShift = ISD::SRL;
4598     else if (OuterShift == ISD::SRL)
4599       InnerShift = ISD::SHL;
4600     else
4601       return false;
4602     if (!isAllOnesConstant(M->getOperand(0)))
4603       return false;
4604     Y = M->getOperand(1);
4605     return true;
4606   };
4607 
4608   SDValue X;
4609   if (matchMask(N1))
4610     X = N0;
4611   else if (matchMask(N0))
4612     X = N1;
4613   else
4614     return SDValue();
4615 
4616   SDLoc DL(N);
4617   EVT VT = N->getValueType(0);
4618 
4619   //     tmp = x   'opposite logical shift' y
4620   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4621   //     ret = tmp 'logical shift' y
4622   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4623 
4624   return T1;
4625 }
4626 
4627 SDValue DAGCombiner::visitAND(SDNode *N) {
4628   SDValue N0 = N->getOperand(0);
4629   SDValue N1 = N->getOperand(1);
4630   EVT VT = N1.getValueType();
4631 
4632   // x & x --> x
4633   if (N0 == N1)
4634     return N0;
4635 
4636   // fold vector ops
4637   if (VT.isVector()) {
4638     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4639       return FoldedVOp;
4640 
4641     // fold (and x, 0) -> 0, vector edition
4642     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4643       // do not return N0, because undef node may exist in N0
4644       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4645                              SDLoc(N), N0.getValueType());
4646     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4647       // do not return N1, because undef node may exist in N1
4648       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4649                              SDLoc(N), N1.getValueType());
4650 
4651     // fold (and x, -1) -> x, vector edition
4652     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4653       return N1;
4654     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4655       return N0;
4656   }
4657 
4658   // fold (and c1, c2) -> c1&c2
4659   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4660   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4661   if (N0C && N1C && !N1C->isOpaque())
4662     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4663   // canonicalize constant to RHS
4664   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4665       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4666     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4667   // fold (and x, -1) -> x
4668   if (isAllOnesConstant(N1))
4669     return N0;
4670   // if (and x, c) is known to be zero, return 0
4671   unsigned BitWidth = VT.getScalarSizeInBits();
4672   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4673                                    APInt::getAllOnesValue(BitWidth)))
4674     return DAG.getConstant(0, SDLoc(N), VT);
4675 
4676   if (SDValue NewSel = foldBinOpIntoSelect(N))
4677     return NewSel;
4678 
4679   // reassociate and
4680   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4681     return RAND;
4682 
4683   // Try to convert a constant mask AND into a shuffle clear mask.
4684   if (VT.isVector())
4685     if (SDValue Shuffle = XformToShuffleWithZero(N))
4686       return Shuffle;
4687 
4688   // fold (and (or x, C), D) -> D if (C & D) == D
4689   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4690     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4691   };
4692   if (N0.getOpcode() == ISD::OR &&
4693       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4694     return N1;
4695   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4696   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4697     SDValue N0Op0 = N0.getOperand(0);
4698     APInt Mask = ~N1C->getAPIntValue();
4699     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4700     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4701       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4702                                  N0.getValueType(), N0Op0);
4703 
4704       // Replace uses of the AND with uses of the Zero extend node.
4705       CombineTo(N, Zext);
4706 
4707       // We actually want to replace all uses of the any_extend with the
4708       // zero_extend, to avoid duplicating things.  This will later cause this
4709       // AND to be folded.
4710       CombineTo(N0.getNode(), Zext);
4711       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4712     }
4713   }
4714   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4715   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4716   // already be zero by virtue of the width of the base type of the load.
4717   //
4718   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4719   // more cases.
4720   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4721        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4722        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4723        N0.getOperand(0).getResNo() == 0) ||
4724       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4725     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4726                                          N0 : N0.getOperand(0) );
4727 
4728     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4729     // This can be a pure constant or a vector splat, in which case we treat the
4730     // vector as a scalar and use the splat value.
4731     APInt Constant = APInt::getNullValue(1);
4732     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4733       Constant = C->getAPIntValue();
4734     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4735       APInt SplatValue, SplatUndef;
4736       unsigned SplatBitSize;
4737       bool HasAnyUndefs;
4738       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4739                                              SplatBitSize, HasAnyUndefs);
4740       if (IsSplat) {
4741         // Undef bits can contribute to a possible optimisation if set, so
4742         // set them.
4743         SplatValue |= SplatUndef;
4744 
4745         // The splat value may be something like "0x00FFFFFF", which means 0 for
4746         // the first vector value and FF for the rest, repeating. We need a mask
4747         // that will apply equally to all members of the vector, so AND all the
4748         // lanes of the constant together.
4749         EVT VT = Vector->getValueType(0);
4750         unsigned BitWidth = VT.getScalarSizeInBits();
4751 
4752         // If the splat value has been compressed to a bitlength lower
4753         // than the size of the vector lane, we need to re-expand it to
4754         // the lane size.
4755         if (BitWidth > SplatBitSize)
4756           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4757                SplatBitSize < BitWidth;
4758                SplatBitSize = SplatBitSize * 2)
4759             SplatValue |= SplatValue.shl(SplatBitSize);
4760 
4761         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4762         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4763         if (SplatBitSize % BitWidth == 0) {
4764           Constant = APInt::getAllOnesValue(BitWidth);
4765           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4766             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4767         }
4768       }
4769     }
4770 
4771     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4772     // actually legal and isn't going to get expanded, else this is a false
4773     // optimisation.
4774     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4775                                                     Load->getValueType(0),
4776                                                     Load->getMemoryVT());
4777 
4778     // Resize the constant to the same size as the original memory access before
4779     // extension. If it is still the AllOnesValue then this AND is completely
4780     // unneeded.
4781     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4782 
4783     bool B;
4784     switch (Load->getExtensionType()) {
4785     default: B = false; break;
4786     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4787     case ISD::ZEXTLOAD:
4788     case ISD::NON_EXTLOAD: B = true; break;
4789     }
4790 
4791     if (B && Constant.isAllOnesValue()) {
4792       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4793       // preserve semantics once we get rid of the AND.
4794       SDValue NewLoad(Load, 0);
4795 
4796       // Fold the AND away. NewLoad may get replaced immediately.
4797       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4798 
4799       if (Load->getExtensionType() == ISD::EXTLOAD) {
4800         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4801                               Load->getValueType(0), SDLoc(Load),
4802                               Load->getChain(), Load->getBasePtr(),
4803                               Load->getOffset(), Load->getMemoryVT(),
4804                               Load->getMemOperand());
4805         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4806         if (Load->getNumValues() == 3) {
4807           // PRE/POST_INC loads have 3 values.
4808           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4809                            NewLoad.getValue(2) };
4810           CombineTo(Load, To, 3, true);
4811         } else {
4812           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4813         }
4814       }
4815 
4816       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4817     }
4818   }
4819 
4820   // fold (and (load x), 255) -> (zextload x, i8)
4821   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4822   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4823   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4824                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4825                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4826     if (SDValue Res = ReduceLoadWidth(N)) {
4827       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4828         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4829       AddToWorklist(N);
4830       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4831       return SDValue(N, 0);
4832     }
4833   }
4834 
4835   if (Level >= AfterLegalizeTypes) {
4836     // Attempt to propagate the AND back up to the leaves which, if they're
4837     // loads, can be combined to narrow loads and the AND node can be removed.
4838     // Perform after legalization so that extend nodes will already be
4839     // combined into the loads.
4840     if (BackwardsPropagateMask(N, DAG)) {
4841       return SDValue(N, 0);
4842     }
4843   }
4844 
4845   if (SDValue Combined = visitANDLike(N0, N1, N))
4846     return Combined;
4847 
4848   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4849   if (N0.getOpcode() == N1.getOpcode())
4850     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4851       return V;
4852 
4853   // Masking the negated extension of a boolean is just the zero-extended
4854   // boolean:
4855   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4856   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4857   //
4858   // Note: the SimplifyDemandedBits fold below can make an information-losing
4859   // transform, and then we have no way to find this better fold.
4860   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4861     if (isNullOrNullSplat(N0.getOperand(0))) {
4862       SDValue SubRHS = N0.getOperand(1);
4863       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4864           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4865         return SubRHS;
4866       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4867           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4868         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4869     }
4870   }
4871 
4872   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4873   // fold (and (sra)) -> (and (srl)) when possible.
4874   if (SimplifyDemandedBits(SDValue(N, 0)))
4875     return SDValue(N, 0);
4876 
4877   // fold (zext_inreg (extload x)) -> (zextload x)
4878   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4879     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4880     EVT MemVT = LN0->getMemoryVT();
4881     // If we zero all the possible extended bits, then we can turn this into
4882     // a zextload if we are running before legalize or the operation is legal.
4883     unsigned BitWidth = N1.getScalarValueSizeInBits();
4884     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4885                            BitWidth - MemVT.getScalarSizeInBits())) &&
4886         ((!LegalOperations && !LN0->isVolatile()) ||
4887          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4888       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4889                                        LN0->getChain(), LN0->getBasePtr(),
4890                                        MemVT, LN0->getMemOperand());
4891       AddToWorklist(N);
4892       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4893       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4894     }
4895   }
4896   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4897   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4898       N0.hasOneUse()) {
4899     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4900     EVT MemVT = LN0->getMemoryVT();
4901     // If we zero all the possible extended bits, then we can turn this into
4902     // a zextload if we are running before legalize or the operation is legal.
4903     unsigned BitWidth = N1.getScalarValueSizeInBits();
4904     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4905                            BitWidth - MemVT.getScalarSizeInBits())) &&
4906         ((!LegalOperations && !LN0->isVolatile()) ||
4907          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4908       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4909                                        LN0->getChain(), LN0->getBasePtr(),
4910                                        MemVT, LN0->getMemOperand());
4911       AddToWorklist(N);
4912       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4913       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4914     }
4915   }
4916   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4917   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4918     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4919                                            N0.getOperand(1), false))
4920       return BSwap;
4921   }
4922 
4923   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
4924     return Shifts;
4925 
4926   return SDValue();
4927 }
4928 
4929 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4930 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4931                                         bool DemandHighBits) {
4932   if (!LegalOperations)
4933     return SDValue();
4934 
4935   EVT VT = N->getValueType(0);
4936   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4937     return SDValue();
4938   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4939     return SDValue();
4940 
4941   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4942   bool LookPassAnd0 = false;
4943   bool LookPassAnd1 = false;
4944   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4945       std::swap(N0, N1);
4946   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4947       std::swap(N0, N1);
4948   if (N0.getOpcode() == ISD::AND) {
4949     if (!N0.getNode()->hasOneUse())
4950       return SDValue();
4951     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4952     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4953     // This is needed for X86.
4954     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4955                   N01C->getZExtValue() != 0xFFFF))
4956       return SDValue();
4957     N0 = N0.getOperand(0);
4958     LookPassAnd0 = true;
4959   }
4960 
4961   if (N1.getOpcode() == ISD::AND) {
4962     if (!N1.getNode()->hasOneUse())
4963       return SDValue();
4964     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4965     if (!N11C || N11C->getZExtValue() != 0xFF)
4966       return SDValue();
4967     N1 = N1.getOperand(0);
4968     LookPassAnd1 = true;
4969   }
4970 
4971   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4972     std::swap(N0, N1);
4973   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4974     return SDValue();
4975   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4976     return SDValue();
4977 
4978   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4979   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
4980   if (!N01C || !N11C)
4981     return SDValue();
4982   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4983     return SDValue();
4984 
4985   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4986   SDValue N00 = N0->getOperand(0);
4987   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4988     if (!N00.getNode()->hasOneUse())
4989       return SDValue();
4990     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
4991     if (!N001C || N001C->getZExtValue() != 0xFF)
4992       return SDValue();
4993     N00 = N00.getOperand(0);
4994     LookPassAnd0 = true;
4995   }
4996 
4997   SDValue N10 = N1->getOperand(0);
4998   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4999     if (!N10.getNode()->hasOneUse())
5000       return SDValue();
5001     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5002     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5003     // for X86.
5004     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5005                    N101C->getZExtValue() != 0xFFFF))
5006       return SDValue();
5007     N10 = N10.getOperand(0);
5008     LookPassAnd1 = true;
5009   }
5010 
5011   if (N00 != N10)
5012     return SDValue();
5013 
5014   // Make sure everything beyond the low halfword gets set to zero since the SRL
5015   // 16 will clear the top bits.
5016   unsigned OpSizeInBits = VT.getSizeInBits();
5017   if (DemandHighBits && OpSizeInBits > 16) {
5018     // If the left-shift isn't masked out then the only way this is a bswap is
5019     // if all bits beyond the low 8 are 0. In that case the entire pattern
5020     // reduces to a left shift anyway: leave it for other parts of the combiner.
5021     if (!LookPassAnd0)
5022       return SDValue();
5023 
5024     // However, if the right shift isn't masked out then it might be because
5025     // it's not needed. See if we can spot that too.
5026     if (!LookPassAnd1 &&
5027         !DAG.MaskedValueIsZero(
5028             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5029       return SDValue();
5030   }
5031 
5032   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5033   if (OpSizeInBits > 16) {
5034     SDLoc DL(N);
5035     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5036                       DAG.getConstant(OpSizeInBits - 16, DL,
5037                                       getShiftAmountTy(VT)));
5038   }
5039   return Res;
5040 }
5041 
5042 /// Return true if the specified node is an element that makes up a 32-bit
5043 /// packed halfword byteswap.
5044 /// ((x & 0x000000ff) << 8) |
5045 /// ((x & 0x0000ff00) >> 8) |
5046 /// ((x & 0x00ff0000) << 8) |
5047 /// ((x & 0xff000000) >> 8)
5048 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5049   if (!N.getNode()->hasOneUse())
5050     return false;
5051 
5052   unsigned Opc = N.getOpcode();
5053   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5054     return false;
5055 
5056   SDValue N0 = N.getOperand(0);
5057   unsigned Opc0 = N0.getOpcode();
5058   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5059     return false;
5060 
5061   ConstantSDNode *N1C = nullptr;
5062   // SHL or SRL: look upstream for AND mask operand
5063   if (Opc == ISD::AND)
5064     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5065   else if (Opc0 == ISD::AND)
5066     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5067   if (!N1C)
5068     return false;
5069 
5070   unsigned MaskByteOffset;
5071   switch (N1C->getZExtValue()) {
5072   default:
5073     return false;
5074   case 0xFF:       MaskByteOffset = 0; break;
5075   case 0xFF00:     MaskByteOffset = 1; break;
5076   case 0xFFFF:
5077     // In case demanded bits didn't clear the bits that will be shifted out.
5078     // This is needed for X86.
5079     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5080       MaskByteOffset = 1;
5081       break;
5082     }
5083     return false;
5084   case 0xFF0000:   MaskByteOffset = 2; break;
5085   case 0xFF000000: MaskByteOffset = 3; break;
5086   }
5087 
5088   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5089   if (Opc == ISD::AND) {
5090     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5091       // (x >> 8) & 0xff
5092       // (x >> 8) & 0xff0000
5093       if (Opc0 != ISD::SRL)
5094         return false;
5095       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5096       if (!C || C->getZExtValue() != 8)
5097         return false;
5098     } else {
5099       // (x << 8) & 0xff00
5100       // (x << 8) & 0xff000000
5101       if (Opc0 != ISD::SHL)
5102         return false;
5103       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5104       if (!C || C->getZExtValue() != 8)
5105         return false;
5106     }
5107   } else if (Opc == ISD::SHL) {
5108     // (x & 0xff) << 8
5109     // (x & 0xff0000) << 8
5110     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5111       return false;
5112     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5113     if (!C || C->getZExtValue() != 8)
5114       return false;
5115   } else { // Opc == ISD::SRL
5116     // (x & 0xff00) >> 8
5117     // (x & 0xff000000) >> 8
5118     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5119       return false;
5120     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5121     if (!C || C->getZExtValue() != 8)
5122       return false;
5123   }
5124 
5125   if (Parts[MaskByteOffset])
5126     return false;
5127 
5128   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5129   return true;
5130 }
5131 
5132 /// Match a 32-bit packed halfword bswap. That is
5133 /// ((x & 0x000000ff) << 8) |
5134 /// ((x & 0x0000ff00) >> 8) |
5135 /// ((x & 0x00ff0000) << 8) |
5136 /// ((x & 0xff000000) >> 8)
5137 /// => (rotl (bswap x), 16)
5138 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5139   if (!LegalOperations)
5140     return SDValue();
5141 
5142   EVT VT = N->getValueType(0);
5143   if (VT != MVT::i32)
5144     return SDValue();
5145   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5146     return SDValue();
5147 
5148   // Look for either
5149   // (or (or (and), (and)), (or (and), (and)))
5150   // (or (or (or (and), (and)), (and)), (and))
5151   if (N0.getOpcode() != ISD::OR)
5152     return SDValue();
5153   SDValue N00 = N0.getOperand(0);
5154   SDValue N01 = N0.getOperand(1);
5155   SDNode *Parts[4] = {};
5156 
5157   if (N1.getOpcode() == ISD::OR &&
5158       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5159     // (or (or (and), (and)), (or (and), (and)))
5160     if (!isBSwapHWordElement(N00, Parts))
5161       return SDValue();
5162 
5163     if (!isBSwapHWordElement(N01, Parts))
5164       return SDValue();
5165     SDValue N10 = N1.getOperand(0);
5166     if (!isBSwapHWordElement(N10, Parts))
5167       return SDValue();
5168     SDValue N11 = N1.getOperand(1);
5169     if (!isBSwapHWordElement(N11, Parts))
5170       return SDValue();
5171   } else {
5172     // (or (or (or (and), (and)), (and)), (and))
5173     if (!isBSwapHWordElement(N1, Parts))
5174       return SDValue();
5175     if (!isBSwapHWordElement(N01, Parts))
5176       return SDValue();
5177     if (N00.getOpcode() != ISD::OR)
5178       return SDValue();
5179     SDValue N000 = N00.getOperand(0);
5180     if (!isBSwapHWordElement(N000, Parts))
5181       return SDValue();
5182     SDValue N001 = N00.getOperand(1);
5183     if (!isBSwapHWordElement(N001, Parts))
5184       return SDValue();
5185   }
5186 
5187   // Make sure the parts are all coming from the same node.
5188   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5189     return SDValue();
5190 
5191   SDLoc DL(N);
5192   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5193                               SDValue(Parts[0], 0));
5194 
5195   // Result of the bswap should be rotated by 16. If it's not legal, then
5196   // do  (x << 16) | (x >> 16).
5197   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5198   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5199     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5200   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5201     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5202   return DAG.getNode(ISD::OR, DL, VT,
5203                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5204                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5205 }
5206 
5207 /// This contains all DAGCombine rules which reduce two values combined by
5208 /// an Or operation to a single value \see visitANDLike().
5209 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5210   EVT VT = N1.getValueType();
5211   SDLoc DL(N);
5212 
5213   // fold (or x, undef) -> -1
5214   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5215     return DAG.getAllOnesConstant(DL, VT);
5216 
5217   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5218     return V;
5219 
5220   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5221   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5222       // Don't increase # computations.
5223       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5224     // We can only do this xform if we know that bits from X that are set in C2
5225     // but not in C1 are already zero.  Likewise for Y.
5226     if (const ConstantSDNode *N0O1C =
5227         getAsNonOpaqueConstant(N0.getOperand(1))) {
5228       if (const ConstantSDNode *N1O1C =
5229           getAsNonOpaqueConstant(N1.getOperand(1))) {
5230         // We can only do this xform if we know that bits from X that are set in
5231         // C2 but not in C1 are already zero.  Likewise for Y.
5232         const APInt &LHSMask = N0O1C->getAPIntValue();
5233         const APInt &RHSMask = N1O1C->getAPIntValue();
5234 
5235         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5236             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5237           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5238                                   N0.getOperand(0), N1.getOperand(0));
5239           return DAG.getNode(ISD::AND, DL, VT, X,
5240                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5241         }
5242       }
5243     }
5244   }
5245 
5246   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5247   if (N0.getOpcode() == ISD::AND &&
5248       N1.getOpcode() == ISD::AND &&
5249       N0.getOperand(0) == N1.getOperand(0) &&
5250       // Don't increase # computations.
5251       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5252     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5253                             N0.getOperand(1), N1.getOperand(1));
5254     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5255   }
5256 
5257   return SDValue();
5258 }
5259 
5260 SDValue DAGCombiner::visitOR(SDNode *N) {
5261   SDValue N0 = N->getOperand(0);
5262   SDValue N1 = N->getOperand(1);
5263   EVT VT = N1.getValueType();
5264 
5265   // x | x --> x
5266   if (N0 == N1)
5267     return N0;
5268 
5269   // fold vector ops
5270   if (VT.isVector()) {
5271     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5272       return FoldedVOp;
5273 
5274     // fold (or x, 0) -> x, vector edition
5275     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5276       return N1;
5277     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5278       return N0;
5279 
5280     // fold (or x, -1) -> -1, vector edition
5281     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5282       // do not return N0, because undef node may exist in N0
5283       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5284     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5285       // do not return N1, because undef node may exist in N1
5286       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5287 
5288     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5289     // Do this only if the resulting shuffle is legal.
5290     if (isa<ShuffleVectorSDNode>(N0) &&
5291         isa<ShuffleVectorSDNode>(N1) &&
5292         // Avoid folding a node with illegal type.
5293         TLI.isTypeLegal(VT)) {
5294       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5295       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5296       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5297       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5298       // Ensure both shuffles have a zero input.
5299       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5300         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5301         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5302         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5303         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5304         bool CanFold = true;
5305         int NumElts = VT.getVectorNumElements();
5306         SmallVector<int, 4> Mask(NumElts);
5307 
5308         for (int i = 0; i != NumElts; ++i) {
5309           int M0 = SV0->getMaskElt(i);
5310           int M1 = SV1->getMaskElt(i);
5311 
5312           // Determine if either index is pointing to a zero vector.
5313           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5314           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5315 
5316           // If one element is zero and the otherside is undef, keep undef.
5317           // This also handles the case that both are undef.
5318           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5319             Mask[i] = -1;
5320             continue;
5321           }
5322 
5323           // Make sure only one of the elements is zero.
5324           if (M0Zero == M1Zero) {
5325             CanFold = false;
5326             break;
5327           }
5328 
5329           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5330 
5331           // We have a zero and non-zero element. If the non-zero came from
5332           // SV0 make the index a LHS index. If it came from SV1, make it
5333           // a RHS index. We need to mod by NumElts because we don't care
5334           // which operand it came from in the original shuffles.
5335           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5336         }
5337 
5338         if (CanFold) {
5339           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5340           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5341 
5342           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5343           if (!LegalMask) {
5344             std::swap(NewLHS, NewRHS);
5345             ShuffleVectorSDNode::commuteMask(Mask);
5346             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5347           }
5348 
5349           if (LegalMask)
5350             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5351         }
5352       }
5353     }
5354   }
5355 
5356   // fold (or c1, c2) -> c1|c2
5357   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5358   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5359   if (N0C && N1C && !N1C->isOpaque())
5360     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5361   // canonicalize constant to RHS
5362   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5363      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5364     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5365   // fold (or x, 0) -> x
5366   if (isNullConstant(N1))
5367     return N0;
5368   // fold (or x, -1) -> -1
5369   if (isAllOnesConstant(N1))
5370     return N1;
5371 
5372   if (SDValue NewSel = foldBinOpIntoSelect(N))
5373     return NewSel;
5374 
5375   // fold (or x, c) -> c iff (x & ~c) == 0
5376   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5377     return N1;
5378 
5379   if (SDValue Combined = visitORLike(N0, N1, N))
5380     return Combined;
5381 
5382   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5383   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5384     return BSwap;
5385   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5386     return BSwap;
5387 
5388   // reassociate or
5389   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5390     return ROR;
5391 
5392   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5393   // iff (c1 & c2) != 0 or c1/c2 are undef.
5394   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5395     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5396   };
5397   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5398       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5399     if (SDValue COR = DAG.FoldConstantArithmetic(
5400             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5401       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5402       AddToWorklist(IOR.getNode());
5403       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5404     }
5405   }
5406 
5407   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5408   if (N0.getOpcode() == N1.getOpcode())
5409     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5410       return V;
5411 
5412   // See if this is some rotate idiom.
5413   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5414     return SDValue(Rot, 0);
5415 
5416   if (SDValue Load = MatchLoadCombine(N))
5417     return Load;
5418 
5419   // Simplify the operands using demanded-bits information.
5420   if (SimplifyDemandedBits(SDValue(N, 0)))
5421     return SDValue(N, 0);
5422 
5423   return SDValue();
5424 }
5425 
5426 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5427   if (Op.getOpcode() == ISD::AND &&
5428       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5429     Mask = Op.getOperand(1);
5430     return Op.getOperand(0);
5431   }
5432   return Op;
5433 }
5434 
5435 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5436 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5437                             SDValue &Mask) {
5438   Op = stripConstantMask(DAG, Op, Mask);
5439   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5440     Shift = Op;
5441     return true;
5442   }
5443   return false;
5444 }
5445 
5446 /// Helper function for visitOR to extract the needed side of a rotate idiom
5447 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5448 /// InstCombine merged some outside op with one of the shifts from
5449 /// the rotate pattern.
5450 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5451 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5452 /// patterns:
5453 ///
5454 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5455 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5456 ///
5457 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5458 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5459 ///
5460 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5461 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5462 ///
5463 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5464 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5465 ///
5466 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5467 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5468                                      SDValue ExtractFrom, SDValue &Mask,
5469                                      const SDLoc &DL) {
5470   assert(OppShift && ExtractFrom && "Empty SDValue");
5471   assert(
5472       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5473       "Existing shift must be valid as a rotate half");
5474 
5475   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5476   // Preconditions:
5477   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5478   //
5479   // Find opcode of the needed shift to be extracted from (op0 v c0).
5480   unsigned Opcode = ISD::DELETED_NODE;
5481   bool IsMulOrDiv = false;
5482   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5483   // opcode or its arithmetic (mul or udiv) variant.
5484   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5485     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5486     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5487       return false;
5488     Opcode = NeededShift;
5489     return true;
5490   };
5491   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5492   // that the needed shift can be extracted from.
5493   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5494       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5495     return SDValue();
5496 
5497   // op0 must be the same opcode on both sides, have the same LHS argument,
5498   // and produce the same value type.
5499   SDValue OppShiftLHS = OppShift.getOperand(0);
5500   EVT ShiftedVT = OppShiftLHS.getValueType();
5501   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5502       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5503       ShiftedVT != ExtractFrom.getValueType())
5504     return SDValue();
5505 
5506   // Amount of the existing shift.
5507   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5508   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5509   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5510   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5511   ConstantSDNode *ExtractFromCst =
5512       isConstOrConstSplat(ExtractFrom.getOperand(1));
5513   // TODO: We should be able to handle non-uniform constant vectors for these values
5514   // Check that we have constant values.
5515   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5516       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5517       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5518     return SDValue();
5519 
5520   // Compute the shift amount we need to extract to complete the rotate.
5521   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5522   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5523     return SDValue();
5524   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5525   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5526   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5527   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5528   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5529 
5530   // Now try extract the needed shift from the ExtractFrom op and see if the
5531   // result matches up with the existing shift's LHS op.
5532   if (IsMulOrDiv) {
5533     // Op to extract from is a mul or udiv by a constant.
5534     // Check:
5535     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5536     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5537     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5538                                                  NeededShiftAmt.getZExtValue());
5539     APInt ResultAmt;
5540     APInt Rem;
5541     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5542     if (Rem != 0 || ResultAmt != OppLHSAmt)
5543       return SDValue();
5544   } else {
5545     // Op to extract from is a shift by a constant.
5546     // Check:
5547     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5548     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5549                                           ExtractFromAmt.getBitWidth()))
5550       return SDValue();
5551   }
5552 
5553   // Return the expanded shift op that should allow a rotate to be formed.
5554   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5555   EVT ResVT = ExtractFrom.getValueType();
5556   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5557   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5558 }
5559 
5560 // Return true if we can prove that, whenever Neg and Pos are both in the
5561 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5562 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5563 //
5564 //     (or (shift1 X, Neg), (shift2 X, Pos))
5565 //
5566 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5567 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5568 // to consider shift amounts with defined behavior.
5569 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5570                            SelectionDAG &DAG) {
5571   // If EltSize is a power of 2 then:
5572   //
5573   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5574   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5575   //
5576   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5577   // for the stronger condition:
5578   //
5579   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5580   //
5581   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5582   // we can just replace Neg with Neg' for the rest of the function.
5583   //
5584   // In other cases we check for the even stronger condition:
5585   //
5586   //     Neg == EltSize - Pos                                    [B]
5587   //
5588   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5589   // behavior if Pos == 0 (and consequently Neg == EltSize).
5590   //
5591   // We could actually use [A] whenever EltSize is a power of 2, but the
5592   // only extra cases that it would match are those uninteresting ones
5593   // where Neg and Pos are never in range at the same time.  E.g. for
5594   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5595   // as well as (sub 32, Pos), but:
5596   //
5597   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5598   //
5599   // always invokes undefined behavior for 32-bit X.
5600   //
5601   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5602   unsigned MaskLoBits = 0;
5603   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5604     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5605       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5606       unsigned Bits = Log2_64(EltSize);
5607       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5608           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5609         Neg = Neg.getOperand(0);
5610         MaskLoBits = Bits;
5611       }
5612     }
5613   }
5614 
5615   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5616   if (Neg.getOpcode() != ISD::SUB)
5617     return false;
5618   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5619   if (!NegC)
5620     return false;
5621   SDValue NegOp1 = Neg.getOperand(1);
5622 
5623   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5624   // Pos'.  The truncation is redundant for the purpose of the equality.
5625   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5626     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5627       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5628       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5629           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5630            MaskLoBits))
5631         Pos = Pos.getOperand(0);
5632     }
5633   }
5634 
5635   // The condition we need is now:
5636   //
5637   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5638   //
5639   // If NegOp1 == Pos then we need:
5640   //
5641   //              EltSize & Mask == NegC & Mask
5642   //
5643   // (because "x & Mask" is a truncation and distributes through subtraction).
5644   APInt Width;
5645   if (Pos == NegOp1)
5646     Width = NegC->getAPIntValue();
5647 
5648   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5649   // Then the condition we want to prove becomes:
5650   //
5651   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5652   //
5653   // which, again because "x & Mask" is a truncation, becomes:
5654   //
5655   //                NegC & Mask == (EltSize - PosC) & Mask
5656   //             EltSize & Mask == (NegC + PosC) & Mask
5657   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5658     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5659       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5660     else
5661       return false;
5662   } else
5663     return false;
5664 
5665   // Now we just need to check that EltSize & Mask == Width & Mask.
5666   if (MaskLoBits)
5667     // EltSize & Mask is 0 since Mask is EltSize - 1.
5668     return Width.getLoBits(MaskLoBits) == 0;
5669   return Width == EltSize;
5670 }
5671 
5672 // A subroutine of MatchRotate used once we have found an OR of two opposite
5673 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5674 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5675 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5676 // Neg with outer conversions stripped away.
5677 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5678                                        SDValue Neg, SDValue InnerPos,
5679                                        SDValue InnerNeg, unsigned PosOpcode,
5680                                        unsigned NegOpcode, const SDLoc &DL) {
5681   // fold (or (shl x, (*ext y)),
5682   //          (srl x, (*ext (sub 32, y)))) ->
5683   //   (rotl x, y) or (rotr x, (sub 32, y))
5684   //
5685   // fold (or (shl x, (*ext (sub 32, y))),
5686   //          (srl x, (*ext y))) ->
5687   //   (rotr x, y) or (rotl x, (sub 32, y))
5688   EVT VT = Shifted.getValueType();
5689   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5690     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5691     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5692                        HasPos ? Pos : Neg).getNode();
5693   }
5694 
5695   return nullptr;
5696 }
5697 
5698 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5699 // idioms for rotate, and if the target supports rotation instructions, generate
5700 // a rot[lr].
5701 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5702   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5703   EVT VT = LHS.getValueType();
5704   if (!TLI.isTypeLegal(VT)) return nullptr;
5705 
5706   // The target must have at least one rotate flavor.
5707   bool HasROTL = hasOperation(ISD::ROTL, VT);
5708   bool HasROTR = hasOperation(ISD::ROTR, VT);
5709   if (!HasROTL && !HasROTR) return nullptr;
5710 
5711   // Check for truncated rotate.
5712   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5713       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5714     assert(LHS.getValueType() == RHS.getValueType());
5715     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5716       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5717                          SDValue(Rot, 0)).getNode();
5718     }
5719   }
5720 
5721   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5722   SDValue LHSShift;   // The shift.
5723   SDValue LHSMask;    // AND value if any.
5724   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5725 
5726   SDValue RHSShift;   // The shift.
5727   SDValue RHSMask;    // AND value if any.
5728   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5729 
5730   // If neither side matched a rotate half, bail
5731   if (!LHSShift && !RHSShift)
5732     return nullptr;
5733 
5734   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5735   // side of the rotate, so try to handle that here. In all cases we need to
5736   // pass the matched shift from the opposite side to compute the opcode and
5737   // needed shift amount to extract.  We still want to do this if both sides
5738   // matched a rotate half because one half may be a potential overshift that
5739   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5740   // single one).
5741 
5742   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5743   if (LHSShift)
5744     if (SDValue NewRHSShift =
5745             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5746       RHSShift = NewRHSShift;
5747   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5748   if (RHSShift)
5749     if (SDValue NewLHSShift =
5750             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5751       LHSShift = NewLHSShift;
5752 
5753   // If a side is still missing, nothing else we can do.
5754   if (!RHSShift || !LHSShift)
5755     return nullptr;
5756 
5757   // At this point we've matched or extracted a shift op on each side.
5758 
5759   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5760     return nullptr;   // Not shifting the same value.
5761 
5762   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5763     return nullptr;   // Shifts must disagree.
5764 
5765   // Canonicalize shl to left side in a shl/srl pair.
5766   if (RHSShift.getOpcode() == ISD::SHL) {
5767     std::swap(LHS, RHS);
5768     std::swap(LHSShift, RHSShift);
5769     std::swap(LHSMask, RHSMask);
5770   }
5771 
5772   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5773   SDValue LHSShiftArg = LHSShift.getOperand(0);
5774   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5775   SDValue RHSShiftArg = RHSShift.getOperand(0);
5776   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5777 
5778   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5779   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5780   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5781                                         ConstantSDNode *RHS) {
5782     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5783   };
5784   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5785     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5786                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5787 
5788     // If there is an AND of either shifted operand, apply it to the result.
5789     if (LHSMask.getNode() || RHSMask.getNode()) {
5790       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5791       SDValue Mask = AllOnes;
5792 
5793       if (LHSMask.getNode()) {
5794         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5795         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5796                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5797       }
5798       if (RHSMask.getNode()) {
5799         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5800         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5801                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5802       }
5803 
5804       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5805     }
5806 
5807     return Rot.getNode();
5808   }
5809 
5810   // If there is a mask here, and we have a variable shift, we can't be sure
5811   // that we're masking out the right stuff.
5812   if (LHSMask.getNode() || RHSMask.getNode())
5813     return nullptr;
5814 
5815   // If the shift amount is sign/zext/any-extended just peel it off.
5816   SDValue LExtOp0 = LHSShiftAmt;
5817   SDValue RExtOp0 = RHSShiftAmt;
5818   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5819        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5820        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5821        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5822       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5823        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5824        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5825        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5826     LExtOp0 = LHSShiftAmt.getOperand(0);
5827     RExtOp0 = RHSShiftAmt.getOperand(0);
5828   }
5829 
5830   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5831                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5832   if (TryL)
5833     return TryL;
5834 
5835   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5836                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5837   if (TryR)
5838     return TryR;
5839 
5840   return nullptr;
5841 }
5842 
5843 namespace {
5844 
5845 /// Represents known origin of an individual byte in load combine pattern. The
5846 /// value of the byte is either constant zero or comes from memory.
5847 struct ByteProvider {
5848   // For constant zero providers Load is set to nullptr. For memory providers
5849   // Load represents the node which loads the byte from memory.
5850   // ByteOffset is the offset of the byte in the value produced by the load.
5851   LoadSDNode *Load = nullptr;
5852   unsigned ByteOffset = 0;
5853 
5854   ByteProvider() = default;
5855 
5856   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
5857     return ByteProvider(Load, ByteOffset);
5858   }
5859 
5860   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
5861 
5862   bool isConstantZero() const { return !Load; }
5863   bool isMemory() const { return Load; }
5864 
5865   bool operator==(const ByteProvider &Other) const {
5866     return Other.Load == Load && Other.ByteOffset == ByteOffset;
5867   }
5868 
5869 private:
5870   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
5871       : Load(Load), ByteOffset(ByteOffset) {}
5872 };
5873 
5874 } // end anonymous namespace
5875 
5876 /// Recursively traverses the expression calculating the origin of the requested
5877 /// byte of the given value. Returns None if the provider can't be calculated.
5878 ///
5879 /// For all the values except the root of the expression verifies that the value
5880 /// has exactly one use and if it's not true return None. This way if the origin
5881 /// of the byte is returned it's guaranteed that the values which contribute to
5882 /// the byte are not used outside of this expression.
5883 ///
5884 /// Because the parts of the expression are not allowed to have more than one
5885 /// use this function iterates over trees, not DAGs. So it never visits the same
5886 /// node more than once.
5887 static const Optional<ByteProvider>
5888 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
5889                       bool Root = false) {
5890   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
5891   if (Depth == 10)
5892     return None;
5893 
5894   if (!Root && !Op.hasOneUse())
5895     return None;
5896 
5897   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
5898   unsigned BitWidth = Op.getValueSizeInBits();
5899   if (BitWidth % 8 != 0)
5900     return None;
5901   unsigned ByteWidth = BitWidth / 8;
5902   assert(Index < ByteWidth && "invalid index requested");
5903   (void) ByteWidth;
5904 
5905   switch (Op.getOpcode()) {
5906   case ISD::OR: {
5907     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
5908     if (!LHS)
5909       return None;
5910     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
5911     if (!RHS)
5912       return None;
5913 
5914     if (LHS->isConstantZero())
5915       return RHS;
5916     if (RHS->isConstantZero())
5917       return LHS;
5918     return None;
5919   }
5920   case ISD::SHL: {
5921     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
5922     if (!ShiftOp)
5923       return None;
5924 
5925     uint64_t BitShift = ShiftOp->getZExtValue();
5926     if (BitShift % 8 != 0)
5927       return None;
5928     uint64_t ByteShift = BitShift / 8;
5929 
5930     return Index < ByteShift
5931                ? ByteProvider::getConstantZero()
5932                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
5933                                        Depth + 1);
5934   }
5935   case ISD::ANY_EXTEND:
5936   case ISD::SIGN_EXTEND:
5937   case ISD::ZERO_EXTEND: {
5938     SDValue NarrowOp = Op->getOperand(0);
5939     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
5940     if (NarrowBitWidth % 8 != 0)
5941       return None;
5942     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5943 
5944     if (Index >= NarrowByteWidth)
5945       return Op.getOpcode() == ISD::ZERO_EXTEND
5946                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5947                  : None;
5948     return calculateByteProvider(NarrowOp, Index, Depth + 1);
5949   }
5950   case ISD::BSWAP:
5951     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
5952                                  Depth + 1);
5953   case ISD::LOAD: {
5954     auto L = cast<LoadSDNode>(Op.getNode());
5955     if (L->isVolatile() || L->isIndexed())
5956       return None;
5957 
5958     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
5959     if (NarrowBitWidth % 8 != 0)
5960       return None;
5961     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
5962 
5963     if (Index >= NarrowByteWidth)
5964       return L->getExtensionType() == ISD::ZEXTLOAD
5965                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
5966                  : None;
5967     return ByteProvider::getMemory(L, Index);
5968   }
5969   }
5970 
5971   return None;
5972 }
5973 
5974 /// Match a pattern where a wide type scalar value is loaded by several narrow
5975 /// loads and combined by shifts and ors. Fold it into a single load or a load
5976 /// and a BSWAP if the targets supports it.
5977 ///
5978 /// Assuming little endian target:
5979 ///  i8 *a = ...
5980 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
5981 /// =>
5982 ///  i32 val = *((i32)a)
5983 ///
5984 ///  i8 *a = ...
5985 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
5986 /// =>
5987 ///  i32 val = BSWAP(*((i32)a))
5988 ///
5989 /// TODO: This rule matches complex patterns with OR node roots and doesn't
5990 /// interact well with the worklist mechanism. When a part of the pattern is
5991 /// updated (e.g. one of the loads) its direct users are put into the worklist,
5992 /// but the root node of the pattern which triggers the load combine is not
5993 /// necessarily a direct user of the changed node. For example, once the address
5994 /// of t28 load is reassociated load combine won't be triggered:
5995 ///             t25: i32 = add t4, Constant:i32<2>
5996 ///           t26: i64 = sign_extend t25
5997 ///        t27: i64 = add t2, t26
5998 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
5999 ///     t29: i32 = zero_extend t28
6000 ///   t32: i32 = shl t29, Constant:i8<8>
6001 /// t33: i32 = or t23, t32
6002 /// As a possible fix visitLoad can check if the load can be a part of a load
6003 /// combine pattern and add corresponding OR roots to the worklist.
6004 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6005   assert(N->getOpcode() == ISD::OR &&
6006          "Can only match load combining against OR nodes");
6007 
6008   // Handles simple types only
6009   EVT VT = N->getValueType(0);
6010   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6011     return SDValue();
6012   unsigned ByteWidth = VT.getSizeInBits() / 8;
6013 
6014   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6015   // Before legalize we can introduce too wide illegal loads which will be later
6016   // split into legal sized loads. This enables us to combine i64 load by i8
6017   // patterns to a couple of i32 loads on 32 bit targets.
6018   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6019     return SDValue();
6020 
6021   std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
6022     unsigned BW, unsigned i) { return i; };
6023   std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
6024     unsigned BW, unsigned i) { return BW - i - 1; };
6025 
6026   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6027   auto MemoryByteOffset = [&] (ByteProvider P) {
6028     assert(P.isMemory() && "Must be a memory byte provider");
6029     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6030     assert(LoadBitWidth % 8 == 0 &&
6031            "can only analyze providers for individual bytes not bit");
6032     unsigned LoadByteWidth = LoadBitWidth / 8;
6033     return IsBigEndianTarget
6034             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6035             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6036   };
6037 
6038   Optional<BaseIndexOffset> Base;
6039   SDValue Chain;
6040 
6041   SmallPtrSet<LoadSDNode *, 8> Loads;
6042   Optional<ByteProvider> FirstByteProvider;
6043   int64_t FirstOffset = INT64_MAX;
6044 
6045   // Check if all the bytes of the OR we are looking at are loaded from the same
6046   // base address. Collect bytes offsets from Base address in ByteOffsets.
6047   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6048   for (unsigned i = 0; i < ByteWidth; i++) {
6049     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6050     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6051       return SDValue();
6052 
6053     LoadSDNode *L = P->Load;
6054     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6055            "Must be enforced by calculateByteProvider");
6056     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6057 
6058     // All loads must share the same chain
6059     SDValue LChain = L->getChain();
6060     if (!Chain)
6061       Chain = LChain;
6062     else if (Chain != LChain)
6063       return SDValue();
6064 
6065     // Loads must share the same base address
6066     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6067     int64_t ByteOffsetFromBase = 0;
6068     if (!Base)
6069       Base = Ptr;
6070     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6071       return SDValue();
6072 
6073     // Calculate the offset of the current byte from the base address
6074     ByteOffsetFromBase += MemoryByteOffset(*P);
6075     ByteOffsets[i] = ByteOffsetFromBase;
6076 
6077     // Remember the first byte load
6078     if (ByteOffsetFromBase < FirstOffset) {
6079       FirstByteProvider = P;
6080       FirstOffset = ByteOffsetFromBase;
6081     }
6082 
6083     Loads.insert(L);
6084   }
6085   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6086          "memory, so there must be at least one load which produces the value");
6087   assert(Base && "Base address of the accessed memory location must be set");
6088   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6089 
6090   // Check if the bytes of the OR we are looking at match with either big or
6091   // little endian value load
6092   bool BigEndian = true, LittleEndian = true;
6093   for (unsigned i = 0; i < ByteWidth; i++) {
6094     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6095     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
6096     BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
6097     if (!BigEndian && !LittleEndian)
6098       return SDValue();
6099   }
6100   assert((BigEndian != LittleEndian) && "should be either or");
6101   assert(FirstByteProvider && "must be set");
6102 
6103   // Ensure that the first byte is loaded from zero offset of the first load.
6104   // So the combined value can be loaded from the first load address.
6105   if (MemoryByteOffset(*FirstByteProvider) != 0)
6106     return SDValue();
6107   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6108 
6109   // The node we are looking at matches with the pattern, check if we can
6110   // replace it with a single load and bswap if needed.
6111 
6112   // If the load needs byte swap check if the target supports it
6113   bool NeedsBswap = IsBigEndianTarget != BigEndian;
6114 
6115   // Before legalize we can introduce illegal bswaps which will be later
6116   // converted to an explicit bswap sequence. This way we end up with a single
6117   // load and byte shuffling instead of several loads and byte shuffling.
6118   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6119     return SDValue();
6120 
6121   // Check that a load of the wide type is both allowed and fast on the target
6122   bool Fast = false;
6123   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6124                                         VT, FirstLoad->getAddressSpace(),
6125                                         FirstLoad->getAlignment(), &Fast);
6126   if (!Allowed || !Fast)
6127     return SDValue();
6128 
6129   SDValue NewLoad =
6130       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6131                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6132 
6133   // Transfer chain users from old loads to the new load.
6134   for (LoadSDNode *L : Loads)
6135     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6136 
6137   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6138 }
6139 
6140 // If the target has andn, bsl, or a similar bit-select instruction,
6141 // we want to unfold masked merge, with canonical pattern of:
6142 //   |        A  |  |B|
6143 //   ((x ^ y) & m) ^ y
6144 //    |  D  |
6145 // Into:
6146 //   (x & m) | (y & ~m)
6147 // If y is a constant, and the 'andn' does not work with immediates,
6148 // we unfold into a different pattern:
6149 //   ~(~x & m) & (m | y)
6150 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6151 //       the very least that breaks andnpd / andnps patterns, and because those
6152 //       patterns are simplified in IR and shouldn't be created in the DAG
6153 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6154   assert(N->getOpcode() == ISD::XOR);
6155 
6156   // Don't touch 'not' (i.e. where y = -1).
6157   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6158     return SDValue();
6159 
6160   EVT VT = N->getValueType(0);
6161 
6162   // There are 3 commutable operators in the pattern,
6163   // so we have to deal with 8 possible variants of the basic pattern.
6164   SDValue X, Y, M;
6165   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6166     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6167       return false;
6168     SDValue Xor = And.getOperand(XorIdx);
6169     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6170       return false;
6171     SDValue Xor0 = Xor.getOperand(0);
6172     SDValue Xor1 = Xor.getOperand(1);
6173     // Don't touch 'not' (i.e. where y = -1).
6174     if (isAllOnesOrAllOnesSplat(Xor1))
6175       return false;
6176     if (Other == Xor0)
6177       std::swap(Xor0, Xor1);
6178     if (Other != Xor1)
6179       return false;
6180     X = Xor0;
6181     Y = Xor1;
6182     M = And.getOperand(XorIdx ? 0 : 1);
6183     return true;
6184   };
6185 
6186   SDValue N0 = N->getOperand(0);
6187   SDValue N1 = N->getOperand(1);
6188   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6189       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6190     return SDValue();
6191 
6192   // Don't do anything if the mask is constant. This should not be reachable.
6193   // InstCombine should have already unfolded this pattern, and DAGCombiner
6194   // probably shouldn't produce it, too.
6195   if (isa<ConstantSDNode>(M.getNode()))
6196     return SDValue();
6197 
6198   // We can transform if the target has AndNot
6199   if (!TLI.hasAndNot(M))
6200     return SDValue();
6201 
6202   SDLoc DL(N);
6203 
6204   // If Y is a constant, check that 'andn' works with immediates.
6205   if (!TLI.hasAndNot(Y)) {
6206     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6207     // If not, we need to do a bit more work to make sure andn is still used.
6208     SDValue NotX = DAG.getNOT(DL, X, VT);
6209     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6210     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6211     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6212     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6213   }
6214 
6215   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6216   SDValue NotM = DAG.getNOT(DL, M, VT);
6217   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6218 
6219   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6220 }
6221 
6222 SDValue DAGCombiner::visitXOR(SDNode *N) {
6223   SDValue N0 = N->getOperand(0);
6224   SDValue N1 = N->getOperand(1);
6225   EVT VT = N0.getValueType();
6226 
6227   // fold vector ops
6228   if (VT.isVector()) {
6229     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6230       return FoldedVOp;
6231 
6232     // fold (xor x, 0) -> x, vector edition
6233     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6234       return N1;
6235     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6236       return N0;
6237   }
6238 
6239   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6240   SDLoc DL(N);
6241   if (N0.isUndef() && N1.isUndef())
6242     return DAG.getConstant(0, DL, VT);
6243   // fold (xor x, undef) -> undef
6244   if (N0.isUndef())
6245     return N0;
6246   if (N1.isUndef())
6247     return N1;
6248   // fold (xor c1, c2) -> c1^c2
6249   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6250   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6251   if (N0C && N1C)
6252     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6253   // canonicalize constant to RHS
6254   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6255      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6256     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6257   // fold (xor x, 0) -> x
6258   if (isNullConstant(N1))
6259     return N0;
6260 
6261   if (SDValue NewSel = foldBinOpIntoSelect(N))
6262     return NewSel;
6263 
6264   // reassociate xor
6265   if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6266     return RXOR;
6267 
6268   // fold !(x cc y) -> (x !cc y)
6269   unsigned N0Opcode = N0.getOpcode();
6270   SDValue LHS, RHS, CC;
6271   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6272     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6273                                                LHS.getValueType().isInteger());
6274     if (!LegalOperations ||
6275         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6276       switch (N0Opcode) {
6277       default:
6278         llvm_unreachable("Unhandled SetCC Equivalent!");
6279       case ISD::SETCC:
6280         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6281       case ISD::SELECT_CC:
6282         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6283                                N0.getOperand(3), NotCC);
6284       }
6285     }
6286   }
6287 
6288   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6289   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6290       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6291     SDValue V = N0.getOperand(0);
6292     SDLoc DL0(N0);
6293     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6294                     DAG.getConstant(1, DL0, V.getValueType()));
6295     AddToWorklist(V.getNode());
6296     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6297   }
6298 
6299   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6300   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6301       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6302     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6303     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6304       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6305       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6306       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6307       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6308       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6309     }
6310   }
6311   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6312   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6313       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6314     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6315     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6316       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6317       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6318       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6319       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6320       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6321     }
6322   }
6323   // fold (xor (and x, y), y) -> (and (not x), y)
6324   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6325     SDValue X = N0.getOperand(0);
6326     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6327     AddToWorklist(NotX.getNode());
6328     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6329   }
6330 
6331   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6332     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6333     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6334     unsigned BitWidth = VT.getScalarSizeInBits();
6335     if (XorC && ShiftC) {
6336       // Don't crash on an oversized shift. We can not guarantee that a bogus
6337       // shift has been simplified to undef.
6338       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6339       if (ShiftAmt < BitWidth) {
6340         APInt Ones = APInt::getAllOnesValue(BitWidth);
6341         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6342         if (XorC->getAPIntValue() == Ones) {
6343           // If the xor constant is a shifted -1, do a 'not' before the shift:
6344           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6345           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6346           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6347           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6348         }
6349       }
6350     }
6351   }
6352 
6353   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6354   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6355     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6356     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6357     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6358       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6359       SDValue S0 = S.getOperand(0);
6360       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6361         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6362         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6363           if (C->getAPIntValue() == (OpSizeInBits - 1))
6364             return DAG.getNode(ISD::ABS, DL, VT, S0);
6365       }
6366     }
6367   }
6368 
6369   // fold (xor x, x) -> 0
6370   if (N0 == N1)
6371     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6372 
6373   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6374   // Here is a concrete example of this equivalence:
6375   // i16   x ==  14
6376   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6377   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6378   //
6379   // =>
6380   //
6381   // i16     ~1      == 0b1111111111111110
6382   // i16 rol(~1, 14) == 0b1011111111111111
6383   //
6384   // Some additional tips to help conceptualize this transform:
6385   // - Try to see the operation as placing a single zero in a value of all ones.
6386   // - There exists no value for x which would allow the result to contain zero.
6387   // - Values of x larger than the bitwidth are undefined and do not require a
6388   //   consistent result.
6389   // - Pushing the zero left requires shifting one bits in from the right.
6390   // A rotate left of ~1 is a nice way of achieving the desired result.
6391   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6392       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6393     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6394                        N0.getOperand(1));
6395   }
6396 
6397   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6398   if (N0Opcode == N1.getOpcode())
6399     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6400       return V;
6401 
6402   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6403   if (SDValue MM = unfoldMaskedMerge(N))
6404     return MM;
6405 
6406   // Simplify the expression using non-local knowledge.
6407   if (SimplifyDemandedBits(SDValue(N, 0)))
6408     return SDValue(N, 0);
6409 
6410   return SDValue();
6411 }
6412 
6413 /// Handle transforms common to the three shifts, when the shift amount is a
6414 /// constant.
6415 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6416   // Do not turn a 'not' into a regular xor.
6417   if (isBitwiseNot(N->getOperand(0)))
6418     return SDValue();
6419 
6420   SDNode *LHS = N->getOperand(0).getNode();
6421   if (!LHS->hasOneUse()) return SDValue();
6422 
6423   // We want to pull some binops through shifts, so that we have (and (shift))
6424   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6425   // thing happens with address calculations, so it's important to canonicalize
6426   // it.
6427   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6428 
6429   switch (LHS->getOpcode()) {
6430   default: return SDValue();
6431   case ISD::OR:
6432   case ISD::XOR:
6433     HighBitSet = false; // We can only transform sra if the high bit is clear.
6434     break;
6435   case ISD::AND:
6436     HighBitSet = true;  // We can only transform sra if the high bit is set.
6437     break;
6438   case ISD::ADD:
6439     if (N->getOpcode() != ISD::SHL)
6440       return SDValue(); // only shl(add) not sr[al](add).
6441     HighBitSet = false; // We can only transform sra if the high bit is clear.
6442     break;
6443   }
6444 
6445   // We require the RHS of the binop to be a constant and not opaque as well.
6446   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6447   if (!BinOpCst) return SDValue();
6448 
6449   // FIXME: disable this unless the input to the binop is a shift by a constant
6450   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6451   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6452   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6453                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6454                  BinOpLHSVal->getOpcode() == ISD::SRL;
6455   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6456                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6457 
6458   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6459       !isCopyOrSelect)
6460     return SDValue();
6461 
6462   if (isCopyOrSelect && N->hasOneUse())
6463     return SDValue();
6464 
6465   EVT VT = N->getValueType(0);
6466 
6467   // If this is a signed shift right, and the high bit is modified by the
6468   // logical operation, do not perform the transformation. The highBitSet
6469   // boolean indicates the value of the high bit of the constant which would
6470   // cause it to be modified for this operation.
6471   if (N->getOpcode() == ISD::SRA) {
6472     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6473     if (BinOpRHSSignSet != HighBitSet)
6474       return SDValue();
6475   }
6476 
6477   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6478     return SDValue();
6479 
6480   // Fold the constants, shifting the binop RHS by the shift amount.
6481   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6482                                N->getValueType(0),
6483                                LHS->getOperand(1), N->getOperand(1));
6484   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6485 
6486   // Create the new shift.
6487   SDValue NewShift = DAG.getNode(N->getOpcode(),
6488                                  SDLoc(LHS->getOperand(0)),
6489                                  VT, LHS->getOperand(0), N->getOperand(1));
6490 
6491   // Create the new binop.
6492   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6493 }
6494 
6495 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6496   assert(N->getOpcode() == ISD::TRUNCATE);
6497   assert(N->getOperand(0).getOpcode() == ISD::AND);
6498 
6499   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6500   EVT TruncVT = N->getValueType(0);
6501   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
6502       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
6503     SDValue N01 = N->getOperand(0).getOperand(1);
6504     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6505       SDLoc DL(N);
6506       SDValue N00 = N->getOperand(0).getOperand(0);
6507       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6508       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6509       AddToWorklist(Trunc00.getNode());
6510       AddToWorklist(Trunc01.getNode());
6511       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6512     }
6513   }
6514 
6515   return SDValue();
6516 }
6517 
6518 SDValue DAGCombiner::visitRotate(SDNode *N) {
6519   SDLoc dl(N);
6520   SDValue N0 = N->getOperand(0);
6521   SDValue N1 = N->getOperand(1);
6522   EVT VT = N->getValueType(0);
6523   unsigned Bitsize = VT.getScalarSizeInBits();
6524 
6525   // fold (rot x, 0) -> x
6526   if (isNullOrNullSplat(N1))
6527     return N0;
6528 
6529   // fold (rot x, c) -> x iff (c % BitSize) == 0
6530   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6531     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6532     if (DAG.MaskedValueIsZero(N1, ModuloMask))
6533       return N0;
6534   }
6535 
6536   // fold (rot x, c) -> (rot x, c % BitSize)
6537   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6538     if (Cst->getAPIntValue().uge(Bitsize)) {
6539       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6540       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6541                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6542     }
6543   }
6544 
6545   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6546   if (N1.getOpcode() == ISD::TRUNCATE &&
6547       N1.getOperand(0).getOpcode() == ISD::AND) {
6548     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6549       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6550   }
6551 
6552   unsigned NextOp = N0.getOpcode();
6553   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6554   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6555     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6556     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6557     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6558       EVT ShiftVT = C1->getValueType(0);
6559       bool SameSide = (N->getOpcode() == NextOp);
6560       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6561       if (SDValue CombinedShift =
6562               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6563         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6564         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6565             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6566             BitsizeC.getNode());
6567         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6568                            CombinedShiftNorm);
6569       }
6570     }
6571   }
6572   return SDValue();
6573 }
6574 
6575 SDValue DAGCombiner::visitSHL(SDNode *N) {
6576   SDValue N0 = N->getOperand(0);
6577   SDValue N1 = N->getOperand(1);
6578   if (SDValue V = DAG.simplifyShift(N0, N1))
6579     return V;
6580 
6581   EVT VT = N0.getValueType();
6582   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6583 
6584   // fold vector ops
6585   if (VT.isVector()) {
6586     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6587       return FoldedVOp;
6588 
6589     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6590     // If setcc produces all-one true value then:
6591     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6592     if (N1CV && N1CV->isConstant()) {
6593       if (N0.getOpcode() == ISD::AND) {
6594         SDValue N00 = N0->getOperand(0);
6595         SDValue N01 = N0->getOperand(1);
6596         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6597 
6598         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6599             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6600                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6601           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6602                                                      N01CV, N1CV))
6603             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6604         }
6605       }
6606     }
6607   }
6608 
6609   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6610 
6611   // fold (shl c1, c2) -> c1<<c2
6612   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6613   if (N0C && N1C && !N1C->isOpaque())
6614     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6615 
6616   if (SDValue NewSel = foldBinOpIntoSelect(N))
6617     return NewSel;
6618 
6619   // if (shl x, c) is known to be zero, return 0
6620   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6621                             APInt::getAllOnesValue(OpSizeInBits)))
6622     return DAG.getConstant(0, SDLoc(N), VT);
6623   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6624   if (N1.getOpcode() == ISD::TRUNCATE &&
6625       N1.getOperand(0).getOpcode() == ISD::AND) {
6626     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6627       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6628   }
6629 
6630   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6631     return SDValue(N, 0);
6632 
6633   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6634   if (N0.getOpcode() == ISD::SHL) {
6635     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6636                                           ConstantSDNode *RHS) {
6637       APInt c1 = LHS->getAPIntValue();
6638       APInt c2 = RHS->getAPIntValue();
6639       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6640       return (c1 + c2).uge(OpSizeInBits);
6641     };
6642     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6643       return DAG.getConstant(0, SDLoc(N), VT);
6644 
6645     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6646                                        ConstantSDNode *RHS) {
6647       APInt c1 = LHS->getAPIntValue();
6648       APInt c2 = RHS->getAPIntValue();
6649       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6650       return (c1 + c2).ult(OpSizeInBits);
6651     };
6652     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6653       SDLoc DL(N);
6654       EVT ShiftVT = N1.getValueType();
6655       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6656       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6657     }
6658   }
6659 
6660   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6661   // For this to be valid, the second form must not preserve any of the bits
6662   // that are shifted out by the inner shift in the first form.  This means
6663   // the outer shift size must be >= the number of bits added by the ext.
6664   // As a corollary, we don't care what kind of ext it is.
6665   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6666               N0.getOpcode() == ISD::ANY_EXTEND ||
6667               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6668       N0.getOperand(0).getOpcode() == ISD::SHL) {
6669     SDValue N0Op0 = N0.getOperand(0);
6670     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6671       APInt c1 = N0Op0C1->getAPIntValue();
6672       APInt c2 = N1C->getAPIntValue();
6673       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6674 
6675       EVT InnerShiftVT = N0Op0.getValueType();
6676       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6677       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6678         SDLoc DL(N0);
6679         APInt Sum = c1 + c2;
6680         if (Sum.uge(OpSizeInBits))
6681           return DAG.getConstant(0, DL, VT);
6682 
6683         return DAG.getNode(
6684             ISD::SHL, DL, VT,
6685             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6686             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6687       }
6688     }
6689   }
6690 
6691   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6692   // Only fold this if the inner zext has no other uses to avoid increasing
6693   // the total number of instructions.
6694   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6695       N0.getOperand(0).getOpcode() == ISD::SRL) {
6696     SDValue N0Op0 = N0.getOperand(0);
6697     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6698       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6699         uint64_t c1 = N0Op0C1->getZExtValue();
6700         uint64_t c2 = N1C->getZExtValue();
6701         if (c1 == c2) {
6702           SDValue NewOp0 = N0.getOperand(0);
6703           EVT CountVT = NewOp0.getOperand(1).getValueType();
6704           SDLoc DL(N);
6705           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6706                                        NewOp0,
6707                                        DAG.getConstant(c2, DL, CountVT));
6708           AddToWorklist(NewSHL.getNode());
6709           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6710         }
6711       }
6712     }
6713   }
6714 
6715   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6716   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6717   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6718       N0->getFlags().hasExact()) {
6719     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6720       uint64_t C1 = N0C1->getZExtValue();
6721       uint64_t C2 = N1C->getZExtValue();
6722       SDLoc DL(N);
6723       if (C1 <= C2)
6724         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6725                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6726       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6727                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6728     }
6729   }
6730 
6731   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6732   //                               (and (srl x, (sub c1, c2), MASK)
6733   // Only fold this if the inner shift has no other uses -- if it does, folding
6734   // this will increase the total number of instructions.
6735   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6736       TLI.shouldFoldShiftPairToMask(N, Level)) {
6737     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6738       uint64_t c1 = N0C1->getZExtValue();
6739       if (c1 < OpSizeInBits) {
6740         uint64_t c2 = N1C->getZExtValue();
6741         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6742         SDValue Shift;
6743         if (c2 > c1) {
6744           Mask <<= c2 - c1;
6745           SDLoc DL(N);
6746           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6747                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6748         } else {
6749           Mask.lshrInPlace(c1 - c2);
6750           SDLoc DL(N);
6751           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6752                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6753         }
6754         SDLoc DL(N0);
6755         return DAG.getNode(ISD::AND, DL, VT, Shift,
6756                            DAG.getConstant(Mask, DL, VT));
6757       }
6758     }
6759   }
6760 
6761   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6762   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6763       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6764     SDLoc DL(N);
6765     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6766     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6767     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6768   }
6769 
6770   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6771   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6772   // Variant of version done on multiply, except mul by a power of 2 is turned
6773   // into a shift.
6774   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6775       N0.getNode()->hasOneUse() &&
6776       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6777       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6778       TLI.isDesirableToCommuteWithShift(N, Level)) {
6779     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6780     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6781     AddToWorklist(Shl0.getNode());
6782     AddToWorklist(Shl1.getNode());
6783     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6784   }
6785 
6786   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6787   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6788       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6789       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6790     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6791     if (isConstantOrConstantVector(Shl))
6792       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6793   }
6794 
6795   if (N1C && !N1C->isOpaque())
6796     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6797       return NewSHL;
6798 
6799   return SDValue();
6800 }
6801 
6802 SDValue DAGCombiner::visitSRA(SDNode *N) {
6803   SDValue N0 = N->getOperand(0);
6804   SDValue N1 = N->getOperand(1);
6805   if (SDValue V = DAG.simplifyShift(N0, N1))
6806     return V;
6807 
6808   EVT VT = N0.getValueType();
6809   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6810 
6811   // Arithmetic shifting an all-sign-bit value is a no-op.
6812   // fold (sra 0, x) -> 0
6813   // fold (sra -1, x) -> -1
6814   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6815     return N0;
6816 
6817   // fold vector ops
6818   if (VT.isVector())
6819     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6820       return FoldedVOp;
6821 
6822   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6823 
6824   // fold (sra c1, c2) -> (sra c1, c2)
6825   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6826   if (N0C && N1C && !N1C->isOpaque())
6827     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
6828 
6829   if (SDValue NewSel = foldBinOpIntoSelect(N))
6830     return NewSel;
6831 
6832   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
6833   // sext_inreg.
6834   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
6835     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
6836     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
6837     if (VT.isVector())
6838       ExtVT = EVT::getVectorVT(*DAG.getContext(),
6839                                ExtVT, VT.getVectorNumElements());
6840     if ((!LegalOperations ||
6841          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
6842       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6843                          N0.getOperand(0), DAG.getValueType(ExtVT));
6844   }
6845 
6846   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
6847   // clamp (add c1, c2) to max shift.
6848   if (N0.getOpcode() == ISD::SRA) {
6849     SDLoc DL(N);
6850     EVT ShiftVT = N1.getValueType();
6851     EVT ShiftSVT = ShiftVT.getScalarType();
6852     SmallVector<SDValue, 16> ShiftValues;
6853 
6854     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6855       APInt c1 = LHS->getAPIntValue();
6856       APInt c2 = RHS->getAPIntValue();
6857       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6858       APInt Sum = c1 + c2;
6859       unsigned ShiftSum =
6860           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
6861       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
6862       return true;
6863     };
6864     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
6865       SDValue ShiftValue;
6866       if (VT.isVector())
6867         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
6868       else
6869         ShiftValue = ShiftValues[0];
6870       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
6871     }
6872   }
6873 
6874   // fold (sra (shl X, m), (sub result_size, n))
6875   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
6876   // result_size - n != m.
6877   // If truncate is free for the target sext(shl) is likely to result in better
6878   // code.
6879   if (N0.getOpcode() == ISD::SHL && N1C) {
6880     // Get the two constanst of the shifts, CN0 = m, CN = n.
6881     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
6882     if (N01C) {
6883       LLVMContext &Ctx = *DAG.getContext();
6884       // Determine what the truncate's result bitsize and type would be.
6885       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
6886 
6887       if (VT.isVector())
6888         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
6889 
6890       // Determine the residual right-shift amount.
6891       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
6892 
6893       // If the shift is not a no-op (in which case this should be just a sign
6894       // extend already), the truncated to type is legal, sign_extend is legal
6895       // on that type, and the truncate to that type is both legal and free,
6896       // perform the transform.
6897       if ((ShiftAmt > 0) &&
6898           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
6899           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
6900           TLI.isTruncateFree(VT, TruncVT)) {
6901         SDLoc DL(N);
6902         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
6903             getShiftAmountTy(N0.getOperand(0).getValueType()));
6904         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
6905                                     N0.getOperand(0), Amt);
6906         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
6907                                     Shift);
6908         return DAG.getNode(ISD::SIGN_EXTEND, DL,
6909                            N->getValueType(0), Trunc);
6910       }
6911     }
6912   }
6913 
6914   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
6915   if (N1.getOpcode() == ISD::TRUNCATE &&
6916       N1.getOperand(0).getOpcode() == ISD::AND) {
6917     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6918       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
6919   }
6920 
6921   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
6922   //      if c1 is equal to the number of bits the trunc removes
6923   if (N0.getOpcode() == ISD::TRUNCATE &&
6924       (N0.getOperand(0).getOpcode() == ISD::SRL ||
6925        N0.getOperand(0).getOpcode() == ISD::SRA) &&
6926       N0.getOperand(0).hasOneUse() &&
6927       N0.getOperand(0).getOperand(1).hasOneUse() &&
6928       N1C) {
6929     SDValue N0Op0 = N0.getOperand(0);
6930     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
6931       unsigned LargeShiftVal = LargeShift->getZExtValue();
6932       EVT LargeVT = N0Op0.getValueType();
6933 
6934       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
6935         SDLoc DL(N);
6936         SDValue Amt =
6937           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
6938                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
6939         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
6940                                   N0Op0.getOperand(0), Amt);
6941         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
6942       }
6943     }
6944   }
6945 
6946   // Simplify, based on bits shifted out of the LHS.
6947   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6948     return SDValue(N, 0);
6949 
6950   // If the sign bit is known to be zero, switch this to a SRL.
6951   if (DAG.SignBitIsZero(N0))
6952     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
6953 
6954   if (N1C && !N1C->isOpaque())
6955     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
6956       return NewSRA;
6957 
6958   return SDValue();
6959 }
6960 
6961 SDValue DAGCombiner::visitSRL(SDNode *N) {
6962   SDValue N0 = N->getOperand(0);
6963   SDValue N1 = N->getOperand(1);
6964   if (SDValue V = DAG.simplifyShift(N0, N1))
6965     return V;
6966 
6967   EVT VT = N0.getValueType();
6968   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6969 
6970   // fold vector ops
6971   if (VT.isVector())
6972     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6973       return FoldedVOp;
6974 
6975   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6976 
6977   // fold (srl c1, c2) -> c1 >>u c2
6978   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6979   if (N0C && N1C && !N1C->isOpaque())
6980     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
6981 
6982   if (SDValue NewSel = foldBinOpIntoSelect(N))
6983     return NewSel;
6984 
6985   // if (srl x, c) is known to be zero, return 0
6986   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
6987                                    APInt::getAllOnesValue(OpSizeInBits)))
6988     return DAG.getConstant(0, SDLoc(N), VT);
6989 
6990   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
6991   if (N0.getOpcode() == ISD::SRL) {
6992     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6993                                           ConstantSDNode *RHS) {
6994       APInt c1 = LHS->getAPIntValue();
6995       APInt c2 = RHS->getAPIntValue();
6996       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6997       return (c1 + c2).uge(OpSizeInBits);
6998     };
6999     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7000       return DAG.getConstant(0, SDLoc(N), VT);
7001 
7002     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7003                                        ConstantSDNode *RHS) {
7004       APInt c1 = LHS->getAPIntValue();
7005       APInt c2 = RHS->getAPIntValue();
7006       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7007       return (c1 + c2).ult(OpSizeInBits);
7008     };
7009     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7010       SDLoc DL(N);
7011       EVT ShiftVT = N1.getValueType();
7012       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7013       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7014     }
7015   }
7016 
7017   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7018   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7019       N0.getOperand(0).getOpcode() == ISD::SRL) {
7020     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7021       uint64_t c1 = N001C->getZExtValue();
7022       uint64_t c2 = N1C->getZExtValue();
7023       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7024       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7025       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7026       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7027       if (c1 + OpSizeInBits == InnerShiftSize) {
7028         SDLoc DL(N0);
7029         if (c1 + c2 >= InnerShiftSize)
7030           return DAG.getConstant(0, DL, VT);
7031         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7032                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7033                                        N0.getOperand(0).getOperand(0),
7034                                        DAG.getConstant(c1 + c2, DL,
7035                                                        ShiftCountVT)));
7036       }
7037     }
7038   }
7039 
7040   // fold (srl (shl x, c), c) -> (and x, cst2)
7041   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7042       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7043     SDLoc DL(N);
7044     SDValue Mask =
7045         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7046     AddToWorklist(Mask.getNode());
7047     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7048   }
7049 
7050   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7051   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7052     // Shifting in all undef bits?
7053     EVT SmallVT = N0.getOperand(0).getValueType();
7054     unsigned BitSize = SmallVT.getScalarSizeInBits();
7055     if (N1C->getZExtValue() >= BitSize)
7056       return DAG.getUNDEF(VT);
7057 
7058     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7059       uint64_t ShiftAmt = N1C->getZExtValue();
7060       SDLoc DL0(N0);
7061       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7062                                        N0.getOperand(0),
7063                           DAG.getConstant(ShiftAmt, DL0,
7064                                           getShiftAmountTy(SmallVT)));
7065       AddToWorklist(SmallShift.getNode());
7066       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7067       SDLoc DL(N);
7068       return DAG.getNode(ISD::AND, DL, VT,
7069                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7070                          DAG.getConstant(Mask, DL, VT));
7071     }
7072   }
7073 
7074   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7075   // bit, which is unmodified by sra.
7076   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
7077     if (N0.getOpcode() == ISD::SRA)
7078       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7079   }
7080 
7081   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7082   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7083       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7084     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7085 
7086     // If any of the input bits are KnownOne, then the input couldn't be all
7087     // zeros, thus the result of the srl will always be zero.
7088     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7089 
7090     // If all of the bits input the to ctlz node are known to be zero, then
7091     // the result of the ctlz is "32" and the result of the shift is one.
7092     APInt UnknownBits = ~Known.Zero;
7093     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7094 
7095     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7096     if (UnknownBits.isPowerOf2()) {
7097       // Okay, we know that only that the single bit specified by UnknownBits
7098       // could be set on input to the CTLZ node. If this bit is set, the SRL
7099       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7100       // to an SRL/XOR pair, which is likely to simplify more.
7101       unsigned ShAmt = UnknownBits.countTrailingZeros();
7102       SDValue Op = N0.getOperand(0);
7103 
7104       if (ShAmt) {
7105         SDLoc DL(N0);
7106         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7107                   DAG.getConstant(ShAmt, DL,
7108                                   getShiftAmountTy(Op.getValueType())));
7109         AddToWorklist(Op.getNode());
7110       }
7111 
7112       SDLoc DL(N);
7113       return DAG.getNode(ISD::XOR, DL, VT,
7114                          Op, DAG.getConstant(1, DL, VT));
7115     }
7116   }
7117 
7118   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7119   if (N1.getOpcode() == ISD::TRUNCATE &&
7120       N1.getOperand(0).getOpcode() == ISD::AND) {
7121     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7122       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7123   }
7124 
7125   // fold operands of srl based on knowledge that the low bits are not
7126   // demanded.
7127   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7128     return SDValue(N, 0);
7129 
7130   if (N1C && !N1C->isOpaque())
7131     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7132       return NewSRL;
7133 
7134   // Attempt to convert a srl of a load into a narrower zero-extending load.
7135   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7136     return NarrowLoad;
7137 
7138   // Here is a common situation. We want to optimize:
7139   //
7140   //   %a = ...
7141   //   %b = and i32 %a, 2
7142   //   %c = srl i32 %b, 1
7143   //   brcond i32 %c ...
7144   //
7145   // into
7146   //
7147   //   %a = ...
7148   //   %b = and %a, 2
7149   //   %c = setcc eq %b, 0
7150   //   brcond %c ...
7151   //
7152   // However when after the source operand of SRL is optimized into AND, the SRL
7153   // itself may not be optimized further. Look for it and add the BRCOND into
7154   // the worklist.
7155   if (N->hasOneUse()) {
7156     SDNode *Use = *N->use_begin();
7157     if (Use->getOpcode() == ISD::BRCOND)
7158       AddToWorklist(Use);
7159     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7160       // Also look pass the truncate.
7161       Use = *Use->use_begin();
7162       if (Use->getOpcode() == ISD::BRCOND)
7163         AddToWorklist(Use);
7164     }
7165   }
7166 
7167   return SDValue();
7168 }
7169 
7170 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7171   EVT VT = N->getValueType(0);
7172   SDValue N0 = N->getOperand(0);
7173   SDValue N1 = N->getOperand(1);
7174   SDValue N2 = N->getOperand(2);
7175   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7176   unsigned BitWidth = VT.getScalarSizeInBits();
7177 
7178   // fold (fshl N0, N1, 0) -> N0
7179   // fold (fshr N0, N1, 0) -> N1
7180   if (isPowerOf2_32(BitWidth))
7181     if (DAG.MaskedValueIsZero(
7182             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7183       return IsFSHL ? N0 : N1;
7184 
7185   auto IsUndefOrZero = [](SDValue V) {
7186     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7187   };
7188 
7189   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7190     EVT ShAmtTy = N2.getValueType();
7191 
7192     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7193     if (Cst->getAPIntValue().uge(BitWidth)) {
7194       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7195       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7196                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7197     }
7198 
7199     unsigned ShAmt = Cst->getZExtValue();
7200     if (ShAmt == 0)
7201       return IsFSHL ? N0 : N1;
7202 
7203     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7204     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7205     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7206     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7207     if (IsUndefOrZero(N0))
7208       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7209                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7210                                          SDLoc(N), ShAmtTy));
7211     if (IsUndefOrZero(N1))
7212       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7213                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7214                                          SDLoc(N), ShAmtTy));
7215   }
7216 
7217   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7218   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7219   // iff We know the shift amount is in range.
7220   // TODO: when is it worth doing SUB(BW, N2) as well?
7221   if (isPowerOf2_32(BitWidth)) {
7222     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7223     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7224       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7225     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7226       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7227   }
7228 
7229   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7230   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7231   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7232   // is legal as well we might be better off avoiding non-constant (BW - N2).
7233   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7234   if (N0 == N1 && hasOperation(RotOpc, VT))
7235     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7236 
7237   // Simplify, based on bits shifted out of N0/N1.
7238   if (SimplifyDemandedBits(SDValue(N, 0)))
7239     return SDValue(N, 0);
7240 
7241   return SDValue();
7242 }
7243 
7244 SDValue DAGCombiner::visitABS(SDNode *N) {
7245   SDValue N0 = N->getOperand(0);
7246   EVT VT = N->getValueType(0);
7247 
7248   // fold (abs c1) -> c2
7249   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7250     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7251   // fold (abs (abs x)) -> (abs x)
7252   if (N0.getOpcode() == ISD::ABS)
7253     return N0;
7254   // fold (abs x) -> x iff not-negative
7255   if (DAG.SignBitIsZero(N0))
7256     return N0;
7257   return SDValue();
7258 }
7259 
7260 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7261   SDValue N0 = N->getOperand(0);
7262   EVT VT = N->getValueType(0);
7263 
7264   // fold (bswap c1) -> c2
7265   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7266     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7267   // fold (bswap (bswap x)) -> x
7268   if (N0.getOpcode() == ISD::BSWAP)
7269     return N0->getOperand(0);
7270   return SDValue();
7271 }
7272 
7273 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7274   SDValue N0 = N->getOperand(0);
7275   EVT VT = N->getValueType(0);
7276 
7277   // fold (bitreverse c1) -> c2
7278   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7279     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7280   // fold (bitreverse (bitreverse x)) -> x
7281   if (N0.getOpcode() == ISD::BITREVERSE)
7282     return N0.getOperand(0);
7283   return SDValue();
7284 }
7285 
7286 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7287   SDValue N0 = N->getOperand(0);
7288   EVT VT = N->getValueType(0);
7289 
7290   // fold (ctlz c1) -> c2
7291   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7292     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7293 
7294   // If the value is known never to be zero, switch to the undef version.
7295   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7296     if (DAG.isKnownNeverZero(N0))
7297       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7298   }
7299 
7300   return SDValue();
7301 }
7302 
7303 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7304   SDValue N0 = N->getOperand(0);
7305   EVT VT = N->getValueType(0);
7306 
7307   // fold (ctlz_zero_undef c1) -> c2
7308   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7309     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7310   return SDValue();
7311 }
7312 
7313 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7314   SDValue N0 = N->getOperand(0);
7315   EVT VT = N->getValueType(0);
7316 
7317   // fold (cttz c1) -> c2
7318   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7319     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7320 
7321   // If the value is known never to be zero, switch to the undef version.
7322   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7323     if (DAG.isKnownNeverZero(N0))
7324       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7325   }
7326 
7327   return SDValue();
7328 }
7329 
7330 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7331   SDValue N0 = N->getOperand(0);
7332   EVT VT = N->getValueType(0);
7333 
7334   // fold (cttz_zero_undef c1) -> c2
7335   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7336     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7337   return SDValue();
7338 }
7339 
7340 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7341   SDValue N0 = N->getOperand(0);
7342   EVT VT = N->getValueType(0);
7343 
7344   // fold (ctpop c1) -> c2
7345   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7346     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7347   return SDValue();
7348 }
7349 
7350 // FIXME: This should be checking for no signed zeros on individual operands, as
7351 // well as no nans.
7352 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
7353                                          SDValue RHS,
7354                                          const TargetLowering &TLI) {
7355   const TargetOptions &Options = DAG.getTarget().Options;
7356   EVT VT = LHS.getValueType();
7357 
7358   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7359          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
7360          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7361 }
7362 
7363 /// Generate Min/Max node
7364 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7365                                    SDValue RHS, SDValue True, SDValue False,
7366                                    ISD::CondCode CC, const TargetLowering &TLI,
7367                                    SelectionDAG &DAG) {
7368   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7369     return SDValue();
7370 
7371   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7372   switch (CC) {
7373   case ISD::SETOLT:
7374   case ISD::SETOLE:
7375   case ISD::SETLT:
7376   case ISD::SETLE:
7377   case ISD::SETULT:
7378   case ISD::SETULE: {
7379     // Since it's known never nan to get here already, either fminnum or
7380     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7381     // expanded in terms of it.
7382     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7383     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7384       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7385 
7386     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7387     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7388       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7389     return SDValue();
7390   }
7391   case ISD::SETOGT:
7392   case ISD::SETOGE:
7393   case ISD::SETGT:
7394   case ISD::SETGE:
7395   case ISD::SETUGT:
7396   case ISD::SETUGE: {
7397     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7398     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7399       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7400 
7401     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7402     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7403       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7404     return SDValue();
7405   }
7406   default:
7407     return SDValue();
7408   }
7409 }
7410 
7411 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7412   SDValue Cond = N->getOperand(0);
7413   SDValue N1 = N->getOperand(1);
7414   SDValue N2 = N->getOperand(2);
7415   EVT VT = N->getValueType(0);
7416   EVT CondVT = Cond.getValueType();
7417   SDLoc DL(N);
7418 
7419   if (!VT.isInteger())
7420     return SDValue();
7421 
7422   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7423   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7424   if (!C1 || !C2)
7425     return SDValue();
7426 
7427   // Only do this before legalization to avoid conflicting with target-specific
7428   // transforms in the other direction (create a select from a zext/sext). There
7429   // is also a target-independent combine here in DAGCombiner in the other
7430   // direction for (select Cond, -1, 0) when the condition is not i1.
7431   if (CondVT == MVT::i1 && !LegalOperations) {
7432     if (C1->isNullValue() && C2->isOne()) {
7433       // select Cond, 0, 1 --> zext (!Cond)
7434       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7435       if (VT != MVT::i1)
7436         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7437       return NotCond;
7438     }
7439     if (C1->isNullValue() && C2->isAllOnesValue()) {
7440       // select Cond, 0, -1 --> sext (!Cond)
7441       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7442       if (VT != MVT::i1)
7443         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7444       return NotCond;
7445     }
7446     if (C1->isOne() && C2->isNullValue()) {
7447       // select Cond, 1, 0 --> zext (Cond)
7448       if (VT != MVT::i1)
7449         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7450       return Cond;
7451     }
7452     if (C1->isAllOnesValue() && C2->isNullValue()) {
7453       // select Cond, -1, 0 --> sext (Cond)
7454       if (VT != MVT::i1)
7455         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7456       return Cond;
7457     }
7458 
7459     // For any constants that differ by 1, we can transform the select into an
7460     // extend and add. Use a target hook because some targets may prefer to
7461     // transform in the other direction.
7462     if (TLI.convertSelectOfConstantsToMath(VT)) {
7463       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7464         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7465         if (VT != MVT::i1)
7466           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7467         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7468       }
7469       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7470         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7471         if (VT != MVT::i1)
7472           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7473         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7474       }
7475     }
7476 
7477     return SDValue();
7478   }
7479 
7480   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7481   // We can't do this reliably if integer based booleans have different contents
7482   // to floating point based booleans. This is because we can't tell whether we
7483   // have an integer-based boolean or a floating-point-based boolean unless we
7484   // can find the SETCC that produced it and inspect its operands. This is
7485   // fairly easy if C is the SETCC node, but it can potentially be
7486   // undiscoverable (or not reasonably discoverable). For example, it could be
7487   // in another basic block or it could require searching a complicated
7488   // expression.
7489   if (CondVT.isInteger() &&
7490       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7491           TargetLowering::ZeroOrOneBooleanContent &&
7492       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7493           TargetLowering::ZeroOrOneBooleanContent &&
7494       C1->isNullValue() && C2->isOne()) {
7495     SDValue NotCond =
7496         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7497     if (VT.bitsEq(CondVT))
7498       return NotCond;
7499     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7500   }
7501 
7502   return SDValue();
7503 }
7504 
7505 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7506   SDValue N0 = N->getOperand(0);
7507   SDValue N1 = N->getOperand(1);
7508   SDValue N2 = N->getOperand(2);
7509   EVT VT = N->getValueType(0);
7510   EVT VT0 = N0.getValueType();
7511   SDLoc DL(N);
7512 
7513   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7514     return V;
7515 
7516   // fold (select X, X, Y) -> (or X, Y)
7517   // fold (select X, 1, Y) -> (or C, Y)
7518   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7519     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7520 
7521   if (SDValue V = foldSelectOfConstants(N))
7522     return V;
7523 
7524   // fold (select C, 0, X) -> (and (not C), X)
7525   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7526     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7527     AddToWorklist(NOTNode.getNode());
7528     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7529   }
7530   // fold (select C, X, 1) -> (or (not C), X)
7531   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7532     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7533     AddToWorklist(NOTNode.getNode());
7534     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7535   }
7536   // fold (select X, Y, X) -> (and X, Y)
7537   // fold (select X, Y, 0) -> (and X, Y)
7538   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7539     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7540 
7541   // If we can fold this based on the true/false value, do so.
7542   if (SimplifySelectOps(N, N1, N2))
7543     return SDValue(N, 0); // Don't revisit N.
7544 
7545   if (VT0 == MVT::i1) {
7546     // The code in this block deals with the following 2 equivalences:
7547     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7548     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7549     // The target can specify its preferred form with the
7550     // shouldNormalizeToSelectSequence() callback. However we always transform
7551     // to the right anyway if we find the inner select exists in the DAG anyway
7552     // and we always transform to the left side if we know that we can further
7553     // optimize the combination of the conditions.
7554     bool normalizeToSequence =
7555         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7556     // select (and Cond0, Cond1), X, Y
7557     //   -> select Cond0, (select Cond1, X, Y), Y
7558     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7559       SDValue Cond0 = N0->getOperand(0);
7560       SDValue Cond1 = N0->getOperand(1);
7561       SDValue InnerSelect =
7562           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7563       if (normalizeToSequence || !InnerSelect.use_empty())
7564         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7565                            InnerSelect, N2);
7566       // Cleanup on failure.
7567       if (InnerSelect.use_empty())
7568         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7569     }
7570     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7571     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7572       SDValue Cond0 = N0->getOperand(0);
7573       SDValue Cond1 = N0->getOperand(1);
7574       SDValue InnerSelect =
7575           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7576       if (normalizeToSequence || !InnerSelect.use_empty())
7577         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7578                            InnerSelect);
7579       // Cleanup on failure.
7580       if (InnerSelect.use_empty())
7581         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7582     }
7583 
7584     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7585     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7586       SDValue N1_0 = N1->getOperand(0);
7587       SDValue N1_1 = N1->getOperand(1);
7588       SDValue N1_2 = N1->getOperand(2);
7589       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7590         // Create the actual and node if we can generate good code for it.
7591         if (!normalizeToSequence) {
7592           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7593           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7594         }
7595         // Otherwise see if we can optimize the "and" to a better pattern.
7596         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7597           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7598                              N2);
7599       }
7600     }
7601     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7602     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7603       SDValue N2_0 = N2->getOperand(0);
7604       SDValue N2_1 = N2->getOperand(1);
7605       SDValue N2_2 = N2->getOperand(2);
7606       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7607         // Create the actual or node if we can generate good code for it.
7608         if (!normalizeToSequence) {
7609           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7610           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7611         }
7612         // Otherwise see if we can optimize to a better pattern.
7613         if (SDValue Combined = visitORLike(N0, N2_0, N))
7614           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7615                              N2_2);
7616       }
7617     }
7618   }
7619 
7620   // select (not Cond), N1, N2 -> select Cond, N2, N1
7621   if (SDValue F = extractBooleanFlip(N0, TLI))
7622     return DAG.getSelect(DL, VT, F, N2, N1);
7623 
7624   // Fold selects based on a setcc into other things, such as min/max/abs.
7625   if (N0.getOpcode() == ISD::SETCC) {
7626     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7627     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7628 
7629     // select (fcmp lt x, y), x, y -> fminnum x, y
7630     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7631     //
7632     // This is OK if we don't care what happens if either operand is a NaN.
7633     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
7634       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7635                                                 CC, TLI, DAG))
7636         return FMinMax;
7637 
7638     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7639     // This is conservatively limited to pre-legal-operations to give targets
7640     // a chance to reverse the transform if they want to do that. Also, it is
7641     // unlikely that the pattern would be formed late, so it's probably not
7642     // worth going through the other checks.
7643     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7644         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7645         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7646       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7647       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7648       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7649         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7650         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7651         //
7652         // The IR equivalent of this transform would have this form:
7653         //   %a = add %x, C
7654         //   %c = icmp ugt %x, ~C
7655         //   %r = select %c, -1, %a
7656         //   =>
7657         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7658         //   %u0 = extractvalue %u, 0
7659         //   %u1 = extractvalue %u, 1
7660         //   %r = select %u1, -1, %u0
7661         SDVTList VTs = DAG.getVTList(VT, VT0);
7662         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7663         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7664       }
7665     }
7666 
7667     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7668         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7669       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7670                          N0.getOperand(2));
7671 
7672     return SimplifySelect(DL, N0, N1, N2);
7673   }
7674 
7675   return SDValue();
7676 }
7677 
7678 static
7679 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7680   SDLoc DL(N);
7681   EVT LoVT, HiVT;
7682   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7683 
7684   // Split the inputs.
7685   SDValue Lo, Hi, LL, LH, RL, RH;
7686   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7687   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7688 
7689   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7690   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7691 
7692   return std::make_pair(Lo, Hi);
7693 }
7694 
7695 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7696 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7697 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7698   SDLoc DL(N);
7699   SDValue Cond = N->getOperand(0);
7700   SDValue LHS = N->getOperand(1);
7701   SDValue RHS = N->getOperand(2);
7702   EVT VT = N->getValueType(0);
7703   int NumElems = VT.getVectorNumElements();
7704   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7705          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7706          Cond.getOpcode() == ISD::BUILD_VECTOR);
7707 
7708   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7709   // binary ones here.
7710   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7711     return SDValue();
7712 
7713   // We're sure we have an even number of elements due to the
7714   // concat_vectors we have as arguments to vselect.
7715   // Skip BV elements until we find one that's not an UNDEF
7716   // After we find an UNDEF element, keep looping until we get to half the
7717   // length of the BV and see if all the non-undef nodes are the same.
7718   ConstantSDNode *BottomHalf = nullptr;
7719   for (int i = 0; i < NumElems / 2; ++i) {
7720     if (Cond->getOperand(i)->isUndef())
7721       continue;
7722 
7723     if (BottomHalf == nullptr)
7724       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7725     else if (Cond->getOperand(i).getNode() != BottomHalf)
7726       return SDValue();
7727   }
7728 
7729   // Do the same for the second half of the BuildVector
7730   ConstantSDNode *TopHalf = nullptr;
7731   for (int i = NumElems / 2; i < NumElems; ++i) {
7732     if (Cond->getOperand(i)->isUndef())
7733       continue;
7734 
7735     if (TopHalf == nullptr)
7736       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7737     else if (Cond->getOperand(i).getNode() != TopHalf)
7738       return SDValue();
7739   }
7740 
7741   assert(TopHalf && BottomHalf &&
7742          "One half of the selector was all UNDEFs and the other was all the "
7743          "same value. This should have been addressed before this function.");
7744   return DAG.getNode(
7745       ISD::CONCAT_VECTORS, DL, VT,
7746       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7747       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7748 }
7749 
7750 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7751   if (Level >= AfterLegalizeTypes)
7752     return SDValue();
7753 
7754   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7755   SDValue Mask = MSC->getMask();
7756   SDValue Data  = MSC->getValue();
7757   SDLoc DL(N);
7758 
7759   // If the MSCATTER data type requires splitting and the mask is provided by a
7760   // SETCC, then split both nodes and its operands before legalization. This
7761   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7762   // and enables future optimizations (e.g. min/max pattern matching on X86).
7763   if (Mask.getOpcode() != ISD::SETCC)
7764     return SDValue();
7765 
7766   // Check if any splitting is required.
7767   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7768       TargetLowering::TypeSplitVector)
7769     return SDValue();
7770   SDValue MaskLo, MaskHi;
7771   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7772 
7773   EVT LoVT, HiVT;
7774   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7775 
7776   SDValue Chain = MSC->getChain();
7777 
7778   EVT MemoryVT = MSC->getMemoryVT();
7779   unsigned Alignment = MSC->getOriginalAlignment();
7780 
7781   EVT LoMemVT, HiMemVT;
7782   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7783 
7784   SDValue DataLo, DataHi;
7785   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7786 
7787   SDValue Scale = MSC->getScale();
7788   SDValue BasePtr = MSC->getBasePtr();
7789   SDValue IndexLo, IndexHi;
7790   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7791 
7792   MachineMemOperand *MMO = DAG.getMachineFunction().
7793     getMachineMemOperand(MSC->getPointerInfo(),
7794                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7795                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7796 
7797   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7798   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7799                                     DataLo.getValueType(), DL, OpsLo, MMO);
7800 
7801   // The order of the Scatter operation after split is well defined. The "Hi"
7802   // part comes after the "Lo". So these two operations should be chained one
7803   // after another.
7804   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7805   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7806                               DL, OpsHi, MMO);
7807 }
7808 
7809 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7810   if (Level >= AfterLegalizeTypes)
7811     return SDValue();
7812 
7813   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7814   SDValue Mask = MST->getMask();
7815   SDValue Data  = MST->getValue();
7816   EVT VT = Data.getValueType();
7817   SDLoc DL(N);
7818 
7819   // If the MSTORE data type requires splitting and the mask is provided by a
7820   // SETCC, then split both nodes and its operands before legalization. This
7821   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7822   // and enables future optimizations (e.g. min/max pattern matching on X86).
7823   if (Mask.getOpcode() == ISD::SETCC) {
7824     // Check if any splitting is required.
7825     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7826         TargetLowering::TypeSplitVector)
7827       return SDValue();
7828 
7829     SDValue MaskLo, MaskHi, Lo, Hi;
7830     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7831 
7832     SDValue Chain = MST->getChain();
7833     SDValue Ptr   = MST->getBasePtr();
7834 
7835     EVT MemoryVT = MST->getMemoryVT();
7836     unsigned Alignment = MST->getOriginalAlignment();
7837 
7838     // if Alignment is equal to the vector size,
7839     // take the half of it for the second part
7840     unsigned SecondHalfAlignment =
7841       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
7842 
7843     EVT LoMemVT, HiMemVT;
7844     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7845 
7846     SDValue DataLo, DataHi;
7847     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7848 
7849     MachineMemOperand *MMO = DAG.getMachineFunction().
7850       getMachineMemOperand(MST->getPointerInfo(),
7851                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7852                            Alignment, MST->getAAInfo(), MST->getRanges());
7853 
7854     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
7855                             MST->isTruncatingStore(),
7856                             MST->isCompressingStore());
7857 
7858     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
7859                                      MST->isCompressingStore());
7860     unsigned HiOffset = LoMemVT.getStoreSize();
7861 
7862     MMO = DAG.getMachineFunction().getMachineMemOperand(
7863         MST->getPointerInfo().getWithOffset(HiOffset),
7864         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
7865         MST->getAAInfo(), MST->getRanges());
7866 
7867     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
7868                             MST->isTruncatingStore(),
7869                             MST->isCompressingStore());
7870 
7871     AddToWorklist(Lo.getNode());
7872     AddToWorklist(Hi.getNode());
7873 
7874     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
7875   }
7876   return SDValue();
7877 }
7878 
7879 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
7880   if (Level >= AfterLegalizeTypes)
7881     return SDValue();
7882 
7883   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
7884   SDValue Mask = MGT->getMask();
7885   SDLoc DL(N);
7886 
7887   // If the MGATHER result requires splitting and the mask is provided by a
7888   // SETCC, then split both nodes and its operands before legalization. This
7889   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7890   // and enables future optimizations (e.g. min/max pattern matching on X86).
7891 
7892   if (Mask.getOpcode() != ISD::SETCC)
7893     return SDValue();
7894 
7895   EVT VT = N->getValueType(0);
7896 
7897   // Check if any splitting is required.
7898   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7899       TargetLowering::TypeSplitVector)
7900     return SDValue();
7901 
7902   SDValue MaskLo, MaskHi, Lo, Hi;
7903   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7904 
7905   SDValue PassThru = MGT->getPassThru();
7906   SDValue PassThruLo, PassThruHi;
7907   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7908 
7909   EVT LoVT, HiVT;
7910   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
7911 
7912   SDValue Chain = MGT->getChain();
7913   EVT MemoryVT = MGT->getMemoryVT();
7914   unsigned Alignment = MGT->getOriginalAlignment();
7915 
7916   EVT LoMemVT, HiMemVT;
7917   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7918 
7919   SDValue Scale = MGT->getScale();
7920   SDValue BasePtr = MGT->getBasePtr();
7921   SDValue Index = MGT->getIndex();
7922   SDValue IndexLo, IndexHi;
7923   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
7924 
7925   MachineMemOperand *MMO = DAG.getMachineFunction().
7926     getMachineMemOperand(MGT->getPointerInfo(),
7927                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
7928                           Alignment, MGT->getAAInfo(), MGT->getRanges());
7929 
7930   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
7931   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
7932                            MMO);
7933 
7934   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
7935   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
7936                            MMO);
7937 
7938   AddToWorklist(Lo.getNode());
7939   AddToWorklist(Hi.getNode());
7940 
7941   // Build a factor node to remember that this load is independent of the
7942   // other one.
7943   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
7944                       Hi.getValue(1));
7945 
7946   // Legalized the chain result - switch anything that used the old chain to
7947   // use the new one.
7948   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
7949 
7950   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
7951 
7952   SDValue RetOps[] = { GatherRes, Chain };
7953   return DAG.getMergeValues(RetOps, DL);
7954 }
7955 
7956 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
7957   if (Level >= AfterLegalizeTypes)
7958     return SDValue();
7959 
7960   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
7961   SDValue Mask = MLD->getMask();
7962   SDLoc DL(N);
7963 
7964   // If the MLOAD result requires splitting and the mask is provided by a
7965   // SETCC, then split both nodes and its operands before legalization. This
7966   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7967   // and enables future optimizations (e.g. min/max pattern matching on X86).
7968   if (Mask.getOpcode() == ISD::SETCC) {
7969     EVT VT = N->getValueType(0);
7970 
7971     // Check if any splitting is required.
7972     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
7973         TargetLowering::TypeSplitVector)
7974       return SDValue();
7975 
7976     SDValue MaskLo, MaskHi, Lo, Hi;
7977     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7978 
7979     SDValue PassThru = MLD->getPassThru();
7980     SDValue PassThruLo, PassThruHi;
7981     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
7982 
7983     EVT LoVT, HiVT;
7984     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
7985 
7986     SDValue Chain = MLD->getChain();
7987     SDValue Ptr   = MLD->getBasePtr();
7988     EVT MemoryVT = MLD->getMemoryVT();
7989     unsigned Alignment = MLD->getOriginalAlignment();
7990 
7991     // if Alignment is equal to the vector size,
7992     // take the half of it for the second part
7993     unsigned SecondHalfAlignment =
7994       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
7995          Alignment/2 : Alignment;
7996 
7997     EVT LoMemVT, HiMemVT;
7998     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7999 
8000     MachineMemOperand *MMO = DAG.getMachineFunction().
8001     getMachineMemOperand(MLD->getPointerInfo(),
8002                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8003                          Alignment, MLD->getAAInfo(), MLD->getRanges());
8004 
8005     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8006                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8007 
8008     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8009                                      MLD->isExpandingLoad());
8010     unsigned HiOffset = LoMemVT.getStoreSize();
8011 
8012     MMO = DAG.getMachineFunction().getMachineMemOperand(
8013         MLD->getPointerInfo().getWithOffset(HiOffset),
8014         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
8015         MLD->getAAInfo(), MLD->getRanges());
8016 
8017     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8018                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8019 
8020     AddToWorklist(Lo.getNode());
8021     AddToWorklist(Hi.getNode());
8022 
8023     // Build a factor node to remember that this load is independent of the
8024     // other one.
8025     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8026                         Hi.getValue(1));
8027 
8028     // Legalized the chain result - switch anything that used the old chain to
8029     // use the new one.
8030     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8031 
8032     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8033 
8034     SDValue RetOps[] = { LoadRes, Chain };
8035     return DAG.getMergeValues(RetOps, DL);
8036   }
8037   return SDValue();
8038 }
8039 
8040 /// A vector select of 2 constant vectors can be simplified to math/logic to
8041 /// avoid a variable select instruction and possibly avoid constant loads.
8042 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8043   SDValue Cond = N->getOperand(0);
8044   SDValue N1 = N->getOperand(1);
8045   SDValue N2 = N->getOperand(2);
8046   EVT VT = N->getValueType(0);
8047   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8048       !TLI.convertSelectOfConstantsToMath(VT) ||
8049       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8050       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8051     return SDValue();
8052 
8053   // Check if we can use the condition value to increment/decrement a single
8054   // constant value. This simplifies a select to an add and removes a constant
8055   // load/materialization from the general case.
8056   bool AllAddOne = true;
8057   bool AllSubOne = true;
8058   unsigned Elts = VT.getVectorNumElements();
8059   for (unsigned i = 0; i != Elts; ++i) {
8060     SDValue N1Elt = N1.getOperand(i);
8061     SDValue N2Elt = N2.getOperand(i);
8062     if (N1Elt.isUndef() || N2Elt.isUndef())
8063       continue;
8064 
8065     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8066     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8067     if (C1 != C2 + 1)
8068       AllAddOne = false;
8069     if (C1 != C2 - 1)
8070       AllSubOne = false;
8071   }
8072 
8073   // Further simplifications for the extra-special cases where the constants are
8074   // all 0 or all -1 should be implemented as folds of these patterns.
8075   SDLoc DL(N);
8076   if (AllAddOne || AllSubOne) {
8077     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8078     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8079     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8080     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8081     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8082   }
8083 
8084   // The general case for select-of-constants:
8085   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8086   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8087   // leave that to a machine-specific pass.
8088   return SDValue();
8089 }
8090 
8091 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8092   SDValue N0 = N->getOperand(0);
8093   SDValue N1 = N->getOperand(1);
8094   SDValue N2 = N->getOperand(2);
8095   EVT VT = N->getValueType(0);
8096   SDLoc DL(N);
8097 
8098   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8099     return V;
8100 
8101   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8102   if (SDValue F = extractBooleanFlip(N0, TLI))
8103     return DAG.getSelect(DL, VT, F, N2, N1);
8104 
8105   // Canonicalize integer abs.
8106   // vselect (setg[te] X,  0),  X, -X ->
8107   // vselect (setgt    X, -1),  X, -X ->
8108   // vselect (setl[te] X,  0), -X,  X ->
8109   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8110   if (N0.getOpcode() == ISD::SETCC) {
8111     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8112     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8113     bool isAbs = false;
8114     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8115 
8116     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8117          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8118         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8119       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8120     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8121              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8122       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8123 
8124     if (isAbs) {
8125       EVT VT = LHS.getValueType();
8126       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8127         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8128 
8129       SDValue Shift = DAG.getNode(
8130           ISD::SRA, DL, VT, LHS,
8131           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8132       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8133       AddToWorklist(Shift.getNode());
8134       AddToWorklist(Add.getNode());
8135       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8136     }
8137 
8138     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8139     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8140     //
8141     // This is OK if we don't care about what happens if either operand is a
8142     // NaN.
8143     //
8144     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(
8145                               DAG, N0.getOperand(0), N0.getOperand(1), TLI)) {
8146       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8147       if (SDValue FMinMax = combineMinNumMaxNum(
8148             DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8149         return FMinMax;
8150     }
8151 
8152     // If this select has a condition (setcc) with narrower operands than the
8153     // select, try to widen the compare to match the select width.
8154     // TODO: This should be extended to handle any constant.
8155     // TODO: This could be extended to handle non-loading patterns, but that
8156     //       requires thorough testing to avoid regressions.
8157     if (isNullOrNullSplat(RHS)) {
8158       EVT NarrowVT = LHS.getValueType();
8159       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8160       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8161       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8162       unsigned WideWidth = WideVT.getScalarSizeInBits();
8163       bool IsSigned = isSignedIntSetCC(CC);
8164       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8165       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8166           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8167           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8168           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8169         // Both compare operands can be widened for free. The LHS can use an
8170         // extended load, and the RHS is a constant:
8171         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8172         //   vselect (setcc extload(X), C'), N1, N2
8173         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8174         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8175         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8176         EVT WideSetCCVT = getSetCCResultType(WideVT);
8177         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8178         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8179       }
8180     }
8181   }
8182 
8183   if (SimplifySelectOps(N, N1, N2))
8184     return SDValue(N, 0);  // Don't revisit N.
8185 
8186   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8187   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8188     return N1;
8189   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8190   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8191     return N2;
8192 
8193   // The ConvertSelectToConcatVector function is assuming both the above
8194   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8195   // and addressed.
8196   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8197       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8198       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8199     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8200       return CV;
8201   }
8202 
8203   if (SDValue V = foldVSelectOfConstants(N))
8204     return V;
8205 
8206   return SDValue();
8207 }
8208 
8209 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8210   SDValue N0 = N->getOperand(0);
8211   SDValue N1 = N->getOperand(1);
8212   SDValue N2 = N->getOperand(2);
8213   SDValue N3 = N->getOperand(3);
8214   SDValue N4 = N->getOperand(4);
8215   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8216 
8217   // fold select_cc lhs, rhs, x, x, cc -> x
8218   if (N2 == N3)
8219     return N2;
8220 
8221   // Determine if the condition we're dealing with is constant
8222   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8223                                   CC, SDLoc(N), false)) {
8224     AddToWorklist(SCC.getNode());
8225 
8226     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8227       if (!SCCC->isNullValue())
8228         return N2;    // cond always true -> true val
8229       else
8230         return N3;    // cond always false -> false val
8231     } else if (SCC->isUndef()) {
8232       // When the condition is UNDEF, just return the first operand. This is
8233       // coherent the DAG creation, no setcc node is created in this case
8234       return N2;
8235     } else if (SCC.getOpcode() == ISD::SETCC) {
8236       // Fold to a simpler select_cc
8237       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8238                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8239                          SCC.getOperand(2));
8240     }
8241   }
8242 
8243   // If we can fold this based on the true/false value, do so.
8244   if (SimplifySelectOps(N, N2, N3))
8245     return SDValue(N, 0);  // Don't revisit N.
8246 
8247   // fold select_cc into other things, such as min/max/abs
8248   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8249 }
8250 
8251 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8252   // setcc is very commonly used as an argument to brcond. This pattern
8253   // also lend itself to numerous combines and, as a result, it is desired
8254   // we keep the argument to a brcond as a setcc as much as possible.
8255   bool PreferSetCC =
8256       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8257 
8258   SDValue Combined = SimplifySetCC(
8259       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8260       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8261 
8262   if (!Combined)
8263     return SDValue();
8264 
8265   // If we prefer to have a setcc, and we don't, we'll try our best to
8266   // recreate one using rebuildSetCC.
8267   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8268     SDValue NewSetCC = rebuildSetCC(Combined);
8269 
8270     // We don't have anything interesting to combine to.
8271     if (NewSetCC.getNode() == N)
8272       return SDValue();
8273 
8274     if (NewSetCC)
8275       return NewSetCC;
8276   }
8277 
8278   return Combined;
8279 }
8280 
8281 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8282   SDValue LHS = N->getOperand(0);
8283   SDValue RHS = N->getOperand(1);
8284   SDValue Carry = N->getOperand(2);
8285   SDValue Cond = N->getOperand(3);
8286 
8287   // If Carry is false, fold to a regular SETCC.
8288   if (isNullConstant(Carry))
8289     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8290 
8291   return SDValue();
8292 }
8293 
8294 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8295 /// a build_vector of constants.
8296 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8297 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8298 /// Vector extends are not folded if operations are legal; this is to
8299 /// avoid introducing illegal build_vector dag nodes.
8300 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8301                                          SelectionDAG &DAG, bool LegalTypes) {
8302   unsigned Opcode = N->getOpcode();
8303   SDValue N0 = N->getOperand(0);
8304   EVT VT = N->getValueType(0);
8305 
8306   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8307          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8308          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8309          && "Expected EXTEND dag node in input!");
8310 
8311   // fold (sext c1) -> c1
8312   // fold (zext c1) -> c1
8313   // fold (aext c1) -> c1
8314   if (isa<ConstantSDNode>(N0))
8315     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8316 
8317   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8318   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8319   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8320   EVT SVT = VT.getScalarType();
8321   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8322       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8323     return SDValue();
8324 
8325   // We can fold this node into a build_vector.
8326   unsigned VTBits = SVT.getSizeInBits();
8327   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8328   SmallVector<SDValue, 8> Elts;
8329   unsigned NumElts = VT.getVectorNumElements();
8330   SDLoc DL(N);
8331 
8332   // For zero-extensions, UNDEF elements still guarantee to have the upper
8333   // bits set to zero.
8334   bool IsZext =
8335       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8336 
8337   for (unsigned i = 0; i != NumElts; ++i) {
8338     SDValue Op = N0.getOperand(i);
8339     if (Op.isUndef()) {
8340       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8341       continue;
8342     }
8343 
8344     SDLoc DL(Op);
8345     // Get the constant value and if needed trunc it to the size of the type.
8346     // Nodes like build_vector might have constants wider than the scalar type.
8347     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8348     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8349       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8350     else
8351       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8352   }
8353 
8354   return DAG.getBuildVector(VT, DL, Elts);
8355 }
8356 
8357 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8358 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8359 // transformation. Returns true if extension are possible and the above
8360 // mentioned transformation is profitable.
8361 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8362                                     unsigned ExtOpc,
8363                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8364                                     const TargetLowering &TLI) {
8365   bool HasCopyToRegUses = false;
8366   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8367   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8368                             UE = N0.getNode()->use_end();
8369        UI != UE; ++UI) {
8370     SDNode *User = *UI;
8371     if (User == N)
8372       continue;
8373     if (UI.getUse().getResNo() != N0.getResNo())
8374       continue;
8375     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8376     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8377       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8378       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8379         // Sign bits will be lost after a zext.
8380         return false;
8381       bool Add = false;
8382       for (unsigned i = 0; i != 2; ++i) {
8383         SDValue UseOp = User->getOperand(i);
8384         if (UseOp == N0)
8385           continue;
8386         if (!isa<ConstantSDNode>(UseOp))
8387           return false;
8388         Add = true;
8389       }
8390       if (Add)
8391         ExtendNodes.push_back(User);
8392       continue;
8393     }
8394     // If truncates aren't free and there are users we can't
8395     // extend, it isn't worthwhile.
8396     if (!isTruncFree)
8397       return false;
8398     // Remember if this value is live-out.
8399     if (User->getOpcode() == ISD::CopyToReg)
8400       HasCopyToRegUses = true;
8401   }
8402 
8403   if (HasCopyToRegUses) {
8404     bool BothLiveOut = false;
8405     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8406          UI != UE; ++UI) {
8407       SDUse &Use = UI.getUse();
8408       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8409         BothLiveOut = true;
8410         break;
8411       }
8412     }
8413     if (BothLiveOut)
8414       // Both unextended and extended values are live out. There had better be
8415       // a good reason for the transformation.
8416       return ExtendNodes.size();
8417   }
8418   return true;
8419 }
8420 
8421 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8422                                   SDValue OrigLoad, SDValue ExtLoad,
8423                                   ISD::NodeType ExtType) {
8424   // Extend SetCC uses if necessary.
8425   SDLoc DL(ExtLoad);
8426   for (SDNode *SetCC : SetCCs) {
8427     SmallVector<SDValue, 4> Ops;
8428 
8429     for (unsigned j = 0; j != 2; ++j) {
8430       SDValue SOp = SetCC->getOperand(j);
8431       if (SOp == OrigLoad)
8432         Ops.push_back(ExtLoad);
8433       else
8434         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8435     }
8436 
8437     Ops.push_back(SetCC->getOperand(2));
8438     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8439   }
8440 }
8441 
8442 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8443 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8444   SDValue N0 = N->getOperand(0);
8445   EVT DstVT = N->getValueType(0);
8446   EVT SrcVT = N0.getValueType();
8447 
8448   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8449           N->getOpcode() == ISD::ZERO_EXTEND) &&
8450          "Unexpected node type (not an extend)!");
8451 
8452   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8453   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8454   //   (v8i32 (sext (v8i16 (load x))))
8455   // into:
8456   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8457   //                          (v4i32 (sextload (x + 16)))))
8458   // Where uses of the original load, i.e.:
8459   //   (v8i16 (load x))
8460   // are replaced with:
8461   //   (v8i16 (truncate
8462   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8463   //                            (v4i32 (sextload (x + 16)))))))
8464   //
8465   // This combine is only applicable to illegal, but splittable, vectors.
8466   // All legal types, and illegal non-vector types, are handled elsewhere.
8467   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8468   //
8469   if (N0->getOpcode() != ISD::LOAD)
8470     return SDValue();
8471 
8472   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8473 
8474   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8475       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8476       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8477     return SDValue();
8478 
8479   SmallVector<SDNode *, 4> SetCCs;
8480   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8481     return SDValue();
8482 
8483   ISD::LoadExtType ExtType =
8484       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8485 
8486   // Try to split the vector types to get down to legal types.
8487   EVT SplitSrcVT = SrcVT;
8488   EVT SplitDstVT = DstVT;
8489   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8490          SplitSrcVT.getVectorNumElements() > 1) {
8491     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8492     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8493   }
8494 
8495   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8496     return SDValue();
8497 
8498   SDLoc DL(N);
8499   const unsigned NumSplits =
8500       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8501   const unsigned Stride = SplitSrcVT.getStoreSize();
8502   SmallVector<SDValue, 4> Loads;
8503   SmallVector<SDValue, 4> Chains;
8504 
8505   SDValue BasePtr = LN0->getBasePtr();
8506   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8507     const unsigned Offset = Idx * Stride;
8508     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8509 
8510     SDValue SplitLoad = DAG.getExtLoad(
8511         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8512         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8513         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8514 
8515     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8516                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8517 
8518     Loads.push_back(SplitLoad.getValue(0));
8519     Chains.push_back(SplitLoad.getValue(1));
8520   }
8521 
8522   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8523   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8524 
8525   // Simplify TF.
8526   AddToWorklist(NewChain.getNode());
8527 
8528   CombineTo(N, NewValue);
8529 
8530   // Replace uses of the original load (before extension)
8531   // with a truncate of the concatenated sextloaded vectors.
8532   SDValue Trunc =
8533       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8534   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8535   CombineTo(N0.getNode(), Trunc, NewChain);
8536   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8537 }
8538 
8539 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8540 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8541 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8542   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8543   EVT VT = N->getValueType(0);
8544   EVT OrigVT = N->getOperand(0).getValueType();
8545   if (TLI.isZExtFree(OrigVT, VT))
8546     return SDValue();
8547 
8548   // and/or/xor
8549   SDValue N0 = N->getOperand(0);
8550   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8551         N0.getOpcode() == ISD::XOR) ||
8552       N0.getOperand(1).getOpcode() != ISD::Constant ||
8553       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8554     return SDValue();
8555 
8556   // shl/shr
8557   SDValue N1 = N0->getOperand(0);
8558   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8559       N1.getOperand(1).getOpcode() != ISD::Constant ||
8560       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8561     return SDValue();
8562 
8563   // load
8564   if (!isa<LoadSDNode>(N1.getOperand(0)))
8565     return SDValue();
8566   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8567   EVT MemVT = Load->getMemoryVT();
8568   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8569       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8570     return SDValue();
8571 
8572 
8573   // If the shift op is SHL, the logic op must be AND, otherwise the result
8574   // will be wrong.
8575   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8576     return SDValue();
8577 
8578   if (!N0.hasOneUse() || !N1.hasOneUse())
8579     return SDValue();
8580 
8581   SmallVector<SDNode*, 4> SetCCs;
8582   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8583                                ISD::ZERO_EXTEND, SetCCs, TLI))
8584     return SDValue();
8585 
8586   // Actually do the transformation.
8587   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8588                                    Load->getChain(), Load->getBasePtr(),
8589                                    Load->getMemoryVT(), Load->getMemOperand());
8590 
8591   SDLoc DL1(N1);
8592   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8593                               N1.getOperand(1));
8594 
8595   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8596   Mask = Mask.zext(VT.getSizeInBits());
8597   SDLoc DL0(N0);
8598   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8599                             DAG.getConstant(Mask, DL0, VT));
8600 
8601   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8602   CombineTo(N, And);
8603   if (SDValue(Load, 0).hasOneUse()) {
8604     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8605   } else {
8606     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8607                                 Load->getValueType(0), ExtLoad);
8608     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8609   }
8610 
8611   // N0 is dead at this point.
8612   recursivelyDeleteUnusedNodes(N0.getNode());
8613 
8614   return SDValue(N,0); // Return N so it doesn't get rechecked!
8615 }
8616 
8617 /// If we're narrowing or widening the result of a vector select and the final
8618 /// size is the same size as a setcc (compare) feeding the select, then try to
8619 /// apply the cast operation to the select's operands because matching vector
8620 /// sizes for a select condition and other operands should be more efficient.
8621 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8622   unsigned CastOpcode = Cast->getOpcode();
8623   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8624           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8625           CastOpcode == ISD::FP_ROUND) &&
8626          "Unexpected opcode for vector select narrowing/widening");
8627 
8628   // We only do this transform before legal ops because the pattern may be
8629   // obfuscated by target-specific operations after legalization. Do not create
8630   // an illegal select op, however, because that may be difficult to lower.
8631   EVT VT = Cast->getValueType(0);
8632   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8633     return SDValue();
8634 
8635   SDValue VSel = Cast->getOperand(0);
8636   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8637       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8638     return SDValue();
8639 
8640   // Does the setcc have the same vector size as the casted select?
8641   SDValue SetCC = VSel.getOperand(0);
8642   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8643   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8644     return SDValue();
8645 
8646   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8647   SDValue A = VSel.getOperand(1);
8648   SDValue B = VSel.getOperand(2);
8649   SDValue CastA, CastB;
8650   SDLoc DL(Cast);
8651   if (CastOpcode == ISD::FP_ROUND) {
8652     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8653     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8654     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8655   } else {
8656     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8657     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8658   }
8659   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8660 }
8661 
8662 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8663 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8664 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8665                                      const TargetLowering &TLI, EVT VT,
8666                                      bool LegalOperations, SDNode *N,
8667                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8668   SDNode *N0Node = N0.getNode();
8669   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8670                                                    : ISD::isZEXTLoad(N0Node);
8671   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8672       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8673     return SDValue();
8674 
8675   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8676   EVT MemVT = LN0->getMemoryVT();
8677   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8678       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8679     return SDValue();
8680 
8681   SDValue ExtLoad =
8682       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8683                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8684   Combiner.CombineTo(N, ExtLoad);
8685   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8686   if (LN0->use_empty())
8687     Combiner.recursivelyDeleteUnusedNodes(LN0);
8688   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8689 }
8690 
8691 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8692 // Only generate vector extloads when 1) they're legal, and 2) they are
8693 // deemed desirable by the target.
8694 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8695                                   const TargetLowering &TLI, EVT VT,
8696                                   bool LegalOperations, SDNode *N, SDValue N0,
8697                                   ISD::LoadExtType ExtLoadType,
8698                                   ISD::NodeType ExtOpc) {
8699   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8700       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8701       ((LegalOperations || VT.isVector() ||
8702         cast<LoadSDNode>(N0)->isVolatile()) &&
8703        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8704     return {};
8705 
8706   bool DoXform = true;
8707   SmallVector<SDNode *, 4> SetCCs;
8708   if (!N0.hasOneUse())
8709     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8710   if (VT.isVector())
8711     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8712   if (!DoXform)
8713     return {};
8714 
8715   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8716   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8717                                    LN0->getBasePtr(), N0.getValueType(),
8718                                    LN0->getMemOperand());
8719   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8720   // If the load value is used only by N, replace it via CombineTo N.
8721   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8722   Combiner.CombineTo(N, ExtLoad);
8723   if (NoReplaceTrunc) {
8724     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8725     Combiner.recursivelyDeleteUnusedNodes(LN0);
8726   } else {
8727     SDValue Trunc =
8728         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8729     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8730   }
8731   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8732 }
8733 
8734 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8735                                        bool LegalOperations) {
8736   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8737           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8738 
8739   SDValue SetCC = N->getOperand(0);
8740   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8741       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8742     return SDValue();
8743 
8744   SDValue X = SetCC.getOperand(0);
8745   SDValue Ones = SetCC.getOperand(1);
8746   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8747   EVT VT = N->getValueType(0);
8748   EVT XVT = X.getValueType();
8749   // setge X, C is canonicalized to setgt, so we do not need to match that
8750   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8751   // not require the 'not' op.
8752   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8753     // Invert and smear/shift the sign bit:
8754     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8755     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8756     SDLoc DL(N);
8757     SDValue NotX = DAG.getNOT(DL, X, VT);
8758     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8759     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8760     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8761   }
8762   return SDValue();
8763 }
8764 
8765 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8766   SDValue N0 = N->getOperand(0);
8767   EVT VT = N->getValueType(0);
8768   SDLoc DL(N);
8769 
8770   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8771     return Res;
8772 
8773   // fold (sext (sext x)) -> (sext x)
8774   // fold (sext (aext x)) -> (sext x)
8775   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8776     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8777 
8778   if (N0.getOpcode() == ISD::TRUNCATE) {
8779     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8780     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8781     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8782       SDNode *oye = N0.getOperand(0).getNode();
8783       if (NarrowLoad.getNode() != N0.getNode()) {
8784         CombineTo(N0.getNode(), NarrowLoad);
8785         // CombineTo deleted the truncate, if needed, but not what's under it.
8786         AddToWorklist(oye);
8787       }
8788       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8789     }
8790 
8791     // See if the value being truncated is already sign extended.  If so, just
8792     // eliminate the trunc/sext pair.
8793     SDValue Op = N0.getOperand(0);
8794     unsigned OpBits   = Op.getScalarValueSizeInBits();
8795     unsigned MidBits  = N0.getScalarValueSizeInBits();
8796     unsigned DestBits = VT.getScalarSizeInBits();
8797     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8798 
8799     if (OpBits == DestBits) {
8800       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8801       // bits, it is already ready.
8802       if (NumSignBits > DestBits-MidBits)
8803         return Op;
8804     } else if (OpBits < DestBits) {
8805       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8806       // bits, just sext from i32.
8807       if (NumSignBits > OpBits-MidBits)
8808         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8809     } else {
8810       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8811       // bits, just truncate to i32.
8812       if (NumSignBits > OpBits-MidBits)
8813         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8814     }
8815 
8816     // fold (sext (truncate x)) -> (sextinreg x).
8817     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8818                                                  N0.getValueType())) {
8819       if (OpBits < DestBits)
8820         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8821       else if (OpBits > DestBits)
8822         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8823       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8824                          DAG.getValueType(N0.getValueType()));
8825     }
8826   }
8827 
8828   // Try to simplify (sext (load x)).
8829   if (SDValue foldedExt =
8830           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
8831                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
8832     return foldedExt;
8833 
8834   // fold (sext (load x)) to multiple smaller sextloads.
8835   // Only on illegal but splittable vectors.
8836   if (SDValue ExtLoad = CombineExtLoad(N))
8837     return ExtLoad;
8838 
8839   // Try to simplify (sext (sextload x)).
8840   if (SDValue foldedExt = tryToFoldExtOfExtload(
8841           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
8842     return foldedExt;
8843 
8844   // fold (sext (and/or/xor (load x), cst)) ->
8845   //      (and/or/xor (sextload x), (sext cst))
8846   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8847        N0.getOpcode() == ISD::XOR) &&
8848       isa<LoadSDNode>(N0.getOperand(0)) &&
8849       N0.getOperand(1).getOpcode() == ISD::Constant &&
8850       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
8851     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
8852     EVT MemVT = LN00->getMemoryVT();
8853     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
8854       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
8855       SmallVector<SDNode*, 4> SetCCs;
8856       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
8857                                              ISD::SIGN_EXTEND, SetCCs, TLI);
8858       if (DoXform) {
8859         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
8860                                          LN00->getChain(), LN00->getBasePtr(),
8861                                          LN00->getMemoryVT(),
8862                                          LN00->getMemOperand());
8863         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8864         Mask = Mask.sext(VT.getSizeInBits());
8865         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
8866                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
8867         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
8868         bool NoReplaceTruncAnd = !N0.hasOneUse();
8869         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
8870         CombineTo(N, And);
8871         // If N0 has multiple uses, change other uses as well.
8872         if (NoReplaceTruncAnd) {
8873           SDValue TruncAnd =
8874               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
8875           CombineTo(N0.getNode(), TruncAnd);
8876         }
8877         if (NoReplaceTrunc) {
8878           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
8879         } else {
8880           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
8881                                       LN00->getValueType(0), ExtLoad);
8882           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
8883         }
8884         return SDValue(N,0); // Return N so it doesn't get rechecked!
8885       }
8886     }
8887   }
8888 
8889   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
8890     return V;
8891 
8892   if (N0.getOpcode() == ISD::SETCC) {
8893     SDValue N00 = N0.getOperand(0);
8894     SDValue N01 = N0.getOperand(1);
8895     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8896     EVT N00VT = N0.getOperand(0).getValueType();
8897 
8898     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
8899     // Only do this before legalize for now.
8900     if (VT.isVector() && !LegalOperations &&
8901         TLI.getBooleanContents(N00VT) ==
8902             TargetLowering::ZeroOrNegativeOneBooleanContent) {
8903       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
8904       // of the same size as the compared operands. Only optimize sext(setcc())
8905       // if this is the case.
8906       EVT SVT = getSetCCResultType(N00VT);
8907 
8908       // If we already have the desired type, don't change it.
8909       if (SVT != N0.getValueType()) {
8910         // We know that the # elements of the results is the same as the
8911         // # elements of the compare (and the # elements of the compare result
8912         // for that matter).  Check to see that they are the same size.  If so,
8913         // we know that the element size of the sext'd result matches the
8914         // element size of the compare operands.
8915         if (VT.getSizeInBits() == SVT.getSizeInBits())
8916           return DAG.getSetCC(DL, VT, N00, N01, CC);
8917 
8918         // If the desired elements are smaller or larger than the source
8919         // elements, we can use a matching integer vector type and then
8920         // truncate/sign extend.
8921         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
8922         if (SVT == MatchingVecType) {
8923           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
8924           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
8925         }
8926       }
8927     }
8928 
8929     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
8930     // Here, T can be 1 or -1, depending on the type of the setcc and
8931     // getBooleanContents().
8932     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
8933 
8934     // To determine the "true" side of the select, we need to know the high bit
8935     // of the value returned by the setcc if it evaluates to true.
8936     // If the type of the setcc is i1, then the true case of the select is just
8937     // sext(i1 1), that is, -1.
8938     // If the type of the setcc is larger (say, i8) then the value of the high
8939     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
8940     // of the appropriate width.
8941     SDValue ExtTrueVal = (SetCCWidth == 1)
8942                              ? DAG.getAllOnesConstant(DL, VT)
8943                              : DAG.getBoolConstant(true, DL, VT, N00VT);
8944     SDValue Zero = DAG.getConstant(0, DL, VT);
8945     if (SDValue SCC =
8946             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
8947       return SCC;
8948 
8949     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
8950       EVT SetCCVT = getSetCCResultType(N00VT);
8951       // Don't do this transform for i1 because there's a select transform
8952       // that would reverse it.
8953       // TODO: We should not do this transform at all without a target hook
8954       // because a sext is likely cheaper than a select?
8955       if (SetCCVT.getScalarSizeInBits() != 1 &&
8956           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
8957         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
8958         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
8959       }
8960     }
8961   }
8962 
8963   // fold (sext x) -> (zext x) if the sign bit is known zero.
8964   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
8965       DAG.SignBitIsZero(N0))
8966     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
8967 
8968   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
8969     return NewVSel;
8970 
8971   return SDValue();
8972 }
8973 
8974 // isTruncateOf - If N is a truncate of some other value, return true, record
8975 // the value being truncated in Op and which of Op's bits are zero/one in Known.
8976 // This function computes KnownBits to avoid a duplicated call to
8977 // computeKnownBits in the caller.
8978 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
8979                          KnownBits &Known) {
8980   if (N->getOpcode() == ISD::TRUNCATE) {
8981     Op = N->getOperand(0);
8982     Known = DAG.computeKnownBits(Op);
8983     return true;
8984   }
8985 
8986   if (N.getOpcode() != ISD::SETCC ||
8987       N.getValueType().getScalarType() != MVT::i1 ||
8988       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
8989     return false;
8990 
8991   SDValue Op0 = N->getOperand(0);
8992   SDValue Op1 = N->getOperand(1);
8993   assert(Op0.getValueType() == Op1.getValueType());
8994 
8995   if (isNullOrNullSplat(Op0))
8996     Op = Op1;
8997   else if (isNullOrNullSplat(Op1))
8998     Op = Op0;
8999   else
9000     return false;
9001 
9002   Known = DAG.computeKnownBits(Op);
9003 
9004   return (Known.Zero | 1).isAllOnesValue();
9005 }
9006 
9007 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9008   SDValue N0 = N->getOperand(0);
9009   EVT VT = N->getValueType(0);
9010 
9011   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9012     return Res;
9013 
9014   // fold (zext (zext x)) -> (zext x)
9015   // fold (zext (aext x)) -> (zext x)
9016   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9017     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9018                        N0.getOperand(0));
9019 
9020   // fold (zext (truncate x)) -> (zext x) or
9021   //      (zext (truncate x)) -> (truncate x)
9022   // This is valid when the truncated bits of x are already zero.
9023   SDValue Op;
9024   KnownBits Known;
9025   if (isTruncateOf(DAG, N0, Op, Known)) {
9026     APInt TruncatedBits =
9027       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9028       APInt(Op.getScalarValueSizeInBits(), 0) :
9029       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9030                         N0.getScalarValueSizeInBits(),
9031                         std::min(Op.getScalarValueSizeInBits(),
9032                                  VT.getScalarSizeInBits()));
9033     if (TruncatedBits.isSubsetOf(Known.Zero))
9034       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9035   }
9036 
9037   // fold (zext (truncate x)) -> (and x, mask)
9038   if (N0.getOpcode() == ISD::TRUNCATE) {
9039     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9040     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9041     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9042       SDNode *oye = N0.getOperand(0).getNode();
9043       if (NarrowLoad.getNode() != N0.getNode()) {
9044         CombineTo(N0.getNode(), NarrowLoad);
9045         // CombineTo deleted the truncate, if needed, but not what's under it.
9046         AddToWorklist(oye);
9047       }
9048       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9049     }
9050 
9051     EVT SrcVT = N0.getOperand(0).getValueType();
9052     EVT MinVT = N0.getValueType();
9053 
9054     // Try to mask before the extension to avoid having to generate a larger mask,
9055     // possibly over several sub-vectors.
9056     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9057       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9058                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9059         SDValue Op = N0.getOperand(0);
9060         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9061         AddToWorklist(Op.getNode());
9062         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9063         // Transfer the debug info; the new node is equivalent to N0.
9064         DAG.transferDbgValues(N0, ZExtOrTrunc);
9065         return ZExtOrTrunc;
9066       }
9067     }
9068 
9069     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9070       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9071       AddToWorklist(Op.getNode());
9072       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9073       // We may safely transfer the debug info describing the truncate node over
9074       // to the equivalent and operation.
9075       DAG.transferDbgValues(N0, And);
9076       return And;
9077     }
9078   }
9079 
9080   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9081   // if either of the casts is not free.
9082   if (N0.getOpcode() == ISD::AND &&
9083       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9084       N0.getOperand(1).getOpcode() == ISD::Constant &&
9085       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9086                            N0.getValueType()) ||
9087        !TLI.isZExtFree(N0.getValueType(), VT))) {
9088     SDValue X = N0.getOperand(0).getOperand(0);
9089     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9090     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9091     Mask = Mask.zext(VT.getSizeInBits());
9092     SDLoc DL(N);
9093     return DAG.getNode(ISD::AND, DL, VT,
9094                        X, DAG.getConstant(Mask, DL, VT));
9095   }
9096 
9097   // Try to simplify (zext (load x)).
9098   if (SDValue foldedExt =
9099           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9100                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9101     return foldedExt;
9102 
9103   // fold (zext (load x)) to multiple smaller zextloads.
9104   // Only on illegal but splittable vectors.
9105   if (SDValue ExtLoad = CombineExtLoad(N))
9106     return ExtLoad;
9107 
9108   // fold (zext (and/or/xor (load x), cst)) ->
9109   //      (and/or/xor (zextload x), (zext cst))
9110   // Unless (and (load x) cst) will match as a zextload already and has
9111   // additional users.
9112   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9113        N0.getOpcode() == ISD::XOR) &&
9114       isa<LoadSDNode>(N0.getOperand(0)) &&
9115       N0.getOperand(1).getOpcode() == ISD::Constant &&
9116       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9117     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9118     EVT MemVT = LN00->getMemoryVT();
9119     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9120         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9121       bool DoXform = true;
9122       SmallVector<SDNode*, 4> SetCCs;
9123       if (!N0.hasOneUse()) {
9124         if (N0.getOpcode() == ISD::AND) {
9125           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9126           EVT LoadResultTy = AndC->getValueType(0);
9127           EVT ExtVT;
9128           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9129             DoXform = false;
9130         }
9131       }
9132       if (DoXform)
9133         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9134                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9135       if (DoXform) {
9136         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9137                                          LN00->getChain(), LN00->getBasePtr(),
9138                                          LN00->getMemoryVT(),
9139                                          LN00->getMemOperand());
9140         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9141         Mask = Mask.zext(VT.getSizeInBits());
9142         SDLoc DL(N);
9143         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9144                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9145         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9146         bool NoReplaceTruncAnd = !N0.hasOneUse();
9147         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9148         CombineTo(N, And);
9149         // If N0 has multiple uses, change other uses as well.
9150         if (NoReplaceTruncAnd) {
9151           SDValue TruncAnd =
9152               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9153           CombineTo(N0.getNode(), TruncAnd);
9154         }
9155         if (NoReplaceTrunc) {
9156           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9157         } else {
9158           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9159                                       LN00->getValueType(0), ExtLoad);
9160           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9161         }
9162         return SDValue(N,0); // Return N so it doesn't get rechecked!
9163       }
9164     }
9165   }
9166 
9167   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9168   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9169   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9170     return ZExtLoad;
9171 
9172   // Try to simplify (zext (zextload x)).
9173   if (SDValue foldedExt = tryToFoldExtOfExtload(
9174           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9175     return foldedExt;
9176 
9177   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9178     return V;
9179 
9180   if (N0.getOpcode() == ISD::SETCC) {
9181     // Only do this before legalize for now.
9182     if (!LegalOperations && VT.isVector() &&
9183         N0.getValueType().getVectorElementType() == MVT::i1) {
9184       EVT N00VT = N0.getOperand(0).getValueType();
9185       if (getSetCCResultType(N00VT) == N0.getValueType())
9186         return SDValue();
9187 
9188       // We know that the # elements of the results is the same as the #
9189       // elements of the compare (and the # elements of the compare result for
9190       // that matter). Check to see that they are the same size. If so, we know
9191       // that the element size of the sext'd result matches the element size of
9192       // the compare operands.
9193       SDLoc DL(N);
9194       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9195       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9196         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9197         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9198                                      N0.getOperand(1), N0.getOperand(2));
9199         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9200       }
9201 
9202       // If the desired elements are smaller or larger than the source
9203       // elements we can use a matching integer vector type and then
9204       // truncate/sign extend.
9205       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9206       SDValue VsetCC =
9207           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9208                       N0.getOperand(1), N0.getOperand(2));
9209       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9210                          VecOnes);
9211     }
9212 
9213     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9214     SDLoc DL(N);
9215     if (SDValue SCC = SimplifySelectCC(
9216             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9217             DAG.getConstant(0, DL, VT),
9218             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9219       return SCC;
9220   }
9221 
9222   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9223   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9224       isa<ConstantSDNode>(N0.getOperand(1)) &&
9225       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9226       N0.hasOneUse()) {
9227     SDValue ShAmt = N0.getOperand(1);
9228     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9229     if (N0.getOpcode() == ISD::SHL) {
9230       SDValue InnerZExt = N0.getOperand(0);
9231       // If the original shl may be shifting out bits, do not perform this
9232       // transformation.
9233       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9234         InnerZExt.getOperand(0).getValueSizeInBits();
9235       if (ShAmtVal > KnownZeroBits)
9236         return SDValue();
9237     }
9238 
9239     SDLoc DL(N);
9240 
9241     // Ensure that the shift amount is wide enough for the shifted value.
9242     if (VT.getSizeInBits() >= 256)
9243       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9244 
9245     return DAG.getNode(N0.getOpcode(), DL, VT,
9246                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9247                        ShAmt);
9248   }
9249 
9250   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9251     return NewVSel;
9252 
9253   return SDValue();
9254 }
9255 
9256 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9257   SDValue N0 = N->getOperand(0);
9258   EVT VT = N->getValueType(0);
9259 
9260   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9261     return Res;
9262 
9263   // fold (aext (aext x)) -> (aext x)
9264   // fold (aext (zext x)) -> (zext x)
9265   // fold (aext (sext x)) -> (sext x)
9266   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9267       N0.getOpcode() == ISD::ZERO_EXTEND ||
9268       N0.getOpcode() == ISD::SIGN_EXTEND)
9269     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9270 
9271   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9272   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9273   if (N0.getOpcode() == ISD::TRUNCATE) {
9274     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9275       SDNode *oye = N0.getOperand(0).getNode();
9276       if (NarrowLoad.getNode() != N0.getNode()) {
9277         CombineTo(N0.getNode(), NarrowLoad);
9278         // CombineTo deleted the truncate, if needed, but not what's under it.
9279         AddToWorklist(oye);
9280       }
9281       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9282     }
9283   }
9284 
9285   // fold (aext (truncate x))
9286   if (N0.getOpcode() == ISD::TRUNCATE)
9287     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9288 
9289   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9290   // if the trunc is not free.
9291   if (N0.getOpcode() == ISD::AND &&
9292       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9293       N0.getOperand(1).getOpcode() == ISD::Constant &&
9294       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9295                           N0.getValueType())) {
9296     SDLoc DL(N);
9297     SDValue X = N0.getOperand(0).getOperand(0);
9298     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9299     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9300     Mask = Mask.zext(VT.getSizeInBits());
9301     return DAG.getNode(ISD::AND, DL, VT,
9302                        X, DAG.getConstant(Mask, DL, VT));
9303   }
9304 
9305   // fold (aext (load x)) -> (aext (truncate (extload x)))
9306   // None of the supported targets knows how to perform load and any_ext
9307   // on vectors in one instruction.  We only perform this transformation on
9308   // scalars.
9309   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9310       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9311       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9312     bool DoXform = true;
9313     SmallVector<SDNode*, 4> SetCCs;
9314     if (!N0.hasOneUse())
9315       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9316                                         TLI);
9317     if (DoXform) {
9318       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9319       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9320                                        LN0->getChain(),
9321                                        LN0->getBasePtr(), N0.getValueType(),
9322                                        LN0->getMemOperand());
9323       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9324       // If the load value is used only by N, replace it via CombineTo N.
9325       bool NoReplaceTrunc = N0.hasOneUse();
9326       CombineTo(N, ExtLoad);
9327       if (NoReplaceTrunc) {
9328         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9329         recursivelyDeleteUnusedNodes(LN0);
9330       } else {
9331         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9332                                     N0.getValueType(), ExtLoad);
9333         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9334       }
9335       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9336     }
9337   }
9338 
9339   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9340   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9341   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9342   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9343       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9344     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9345     ISD::LoadExtType ExtType = LN0->getExtensionType();
9346     EVT MemVT = LN0->getMemoryVT();
9347     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9348       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9349                                        VT, LN0->getChain(), LN0->getBasePtr(),
9350                                        MemVT, LN0->getMemOperand());
9351       CombineTo(N, ExtLoad);
9352       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9353       recursivelyDeleteUnusedNodes(LN0);
9354       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9355     }
9356   }
9357 
9358   if (N0.getOpcode() == ISD::SETCC) {
9359     // For vectors:
9360     // aext(setcc) -> vsetcc
9361     // aext(setcc) -> truncate(vsetcc)
9362     // aext(setcc) -> aext(vsetcc)
9363     // Only do this before legalize for now.
9364     if (VT.isVector() && !LegalOperations) {
9365       EVT N00VT = N0.getOperand(0).getValueType();
9366       if (getSetCCResultType(N00VT) == N0.getValueType())
9367         return SDValue();
9368 
9369       // We know that the # elements of the results is the same as the
9370       // # elements of the compare (and the # elements of the compare result
9371       // for that matter).  Check to see that they are the same size.  If so,
9372       // we know that the element size of the sext'd result matches the
9373       // element size of the compare operands.
9374       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9375         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9376                              N0.getOperand(1),
9377                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9378 
9379       // If the desired elements are smaller or larger than the source
9380       // elements we can use a matching integer vector type and then
9381       // truncate/any extend
9382       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9383       SDValue VsetCC =
9384         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9385                       N0.getOperand(1),
9386                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9387       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9388     }
9389 
9390     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9391     SDLoc DL(N);
9392     if (SDValue SCC = SimplifySelectCC(
9393             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9394             DAG.getConstant(0, DL, VT),
9395             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9396       return SCC;
9397   }
9398 
9399   return SDValue();
9400 }
9401 
9402 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9403   unsigned Opcode = N->getOpcode();
9404   SDValue N0 = N->getOperand(0);
9405   SDValue N1 = N->getOperand(1);
9406   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9407 
9408   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9409   if (N0.getOpcode() == Opcode &&
9410       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9411     return N0;
9412 
9413   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9414       N0.getOperand(0).getOpcode() == Opcode) {
9415     // We have an assert, truncate, assert sandwich. Make one stronger assert
9416     // by asserting on the smallest asserted type to the larger source type.
9417     // This eliminates the later assert:
9418     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9419     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9420     SDValue BigA = N0.getOperand(0);
9421     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9422     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9423            "Asserting zero/sign-extended bits to a type larger than the "
9424            "truncated destination does not provide information");
9425 
9426     SDLoc DL(N);
9427     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9428     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9429     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9430                                     BigA.getOperand(0), MinAssertVTVal);
9431     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9432   }
9433 
9434   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9435   // than X. Just move the AssertZext in front of the truncate and drop the
9436   // AssertSExt.
9437   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9438       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9439       Opcode == ISD::AssertZext) {
9440     SDValue BigA = N0.getOperand(0);
9441     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9442     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9443            "Asserting zero/sign-extended bits to a type larger than the "
9444            "truncated destination does not provide information");
9445 
9446     if (AssertVT.bitsLT(BigA_AssertVT)) {
9447       SDLoc DL(N);
9448       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9449                                       BigA.getOperand(0), N1);
9450       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9451     }
9452   }
9453 
9454   return SDValue();
9455 }
9456 
9457 /// If the result of a wider load is shifted to right of N  bits and then
9458 /// truncated to a narrower type and where N is a multiple of number of bits of
9459 /// the narrower type, transform it to a narrower load from address + N / num of
9460 /// bits of new type. Also narrow the load if the result is masked with an AND
9461 /// to effectively produce a smaller type. If the result is to be extended, also
9462 /// fold the extension to form a extending load.
9463 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9464   unsigned Opc = N->getOpcode();
9465 
9466   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9467   SDValue N0 = N->getOperand(0);
9468   EVT VT = N->getValueType(0);
9469   EVT ExtVT = VT;
9470 
9471   // This transformation isn't valid for vector loads.
9472   if (VT.isVector())
9473     return SDValue();
9474 
9475   unsigned ShAmt = 0;
9476   bool HasShiftedOffset = false;
9477   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9478   // extended to VT.
9479   if (Opc == ISD::SIGN_EXTEND_INREG) {
9480     ExtType = ISD::SEXTLOAD;
9481     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9482   } else if (Opc == ISD::SRL) {
9483     // Another special-case: SRL is basically zero-extending a narrower value,
9484     // or it maybe shifting a higher subword, half or byte into the lowest
9485     // bits.
9486     ExtType = ISD::ZEXTLOAD;
9487     N0 = SDValue(N, 0);
9488 
9489     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9490     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9491     if (!N01 || !LN0)
9492       return SDValue();
9493 
9494     uint64_t ShiftAmt = N01->getZExtValue();
9495     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9496     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9497       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9498     else
9499       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9500                                 VT.getSizeInBits() - ShiftAmt);
9501   } else if (Opc == ISD::AND) {
9502     // An AND with a constant mask is the same as a truncate + zero-extend.
9503     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9504     if (!AndC)
9505       return SDValue();
9506 
9507     const APInt &Mask = AndC->getAPIntValue();
9508     unsigned ActiveBits = 0;
9509     if (Mask.isMask()) {
9510       ActiveBits = Mask.countTrailingOnes();
9511     } else if (Mask.isShiftedMask()) {
9512       ShAmt = Mask.countTrailingZeros();
9513       APInt ShiftedMask = Mask.lshr(ShAmt);
9514       ActiveBits = ShiftedMask.countTrailingOnes();
9515       HasShiftedOffset = true;
9516     } else
9517       return SDValue();
9518 
9519     ExtType = ISD::ZEXTLOAD;
9520     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9521   }
9522 
9523   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9524     SDValue SRL = N0;
9525     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9526       ShAmt = ConstShift->getZExtValue();
9527       unsigned EVTBits = ExtVT.getSizeInBits();
9528       // Is the shift amount a multiple of size of VT?
9529       if ((ShAmt & (EVTBits-1)) == 0) {
9530         N0 = N0.getOperand(0);
9531         // Is the load width a multiple of size of VT?
9532         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9533           return SDValue();
9534       }
9535 
9536       // At this point, we must have a load or else we can't do the transform.
9537       if (!isa<LoadSDNode>(N0)) return SDValue();
9538 
9539       auto *LN0 = cast<LoadSDNode>(N0);
9540 
9541       // Because a SRL must be assumed to *need* to zero-extend the high bits
9542       // (as opposed to anyext the high bits), we can't combine the zextload
9543       // lowering of SRL and an sextload.
9544       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9545         return SDValue();
9546 
9547       // If the shift amount is larger than the input type then we're not
9548       // accessing any of the loaded bytes.  If the load was a zextload/extload
9549       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9550       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9551         return SDValue();
9552 
9553       // If the SRL is only used by a masking AND, we may be able to adjust
9554       // the ExtVT to make the AND redundant.
9555       SDNode *Mask = *(SRL->use_begin());
9556       if (Mask->getOpcode() == ISD::AND &&
9557           isa<ConstantSDNode>(Mask->getOperand(1))) {
9558         const APInt &ShiftMask =
9559           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9560         if (ShiftMask.isMask()) {
9561           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9562                                            ShiftMask.countTrailingOnes());
9563           // If the mask is smaller, recompute the type.
9564           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9565               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9566             ExtVT = MaskedVT;
9567         }
9568       }
9569     }
9570   }
9571 
9572   // If the load is shifted left (and the result isn't shifted back right),
9573   // we can fold the truncate through the shift.
9574   unsigned ShLeftAmt = 0;
9575   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9576       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9577     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9578       ShLeftAmt = N01->getZExtValue();
9579       N0 = N0.getOperand(0);
9580     }
9581   }
9582 
9583   // If we haven't found a load, we can't narrow it.
9584   if (!isa<LoadSDNode>(N0))
9585     return SDValue();
9586 
9587   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9588   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9589     return SDValue();
9590 
9591   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9592     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9593     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9594     return LVTStoreBits - EVTStoreBits - ShAmt;
9595   };
9596 
9597   // For big endian targets, we need to adjust the offset to the pointer to
9598   // load the correct bytes.
9599   if (DAG.getDataLayout().isBigEndian())
9600     ShAmt = AdjustBigEndianShift(ShAmt);
9601 
9602   EVT PtrType = N0.getOperand(1).getValueType();
9603   uint64_t PtrOff = ShAmt / 8;
9604   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9605   SDLoc DL(LN0);
9606   // The original load itself didn't wrap, so an offset within it doesn't.
9607   SDNodeFlags Flags;
9608   Flags.setNoUnsignedWrap(true);
9609   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9610                                PtrType, LN0->getBasePtr(),
9611                                DAG.getConstant(PtrOff, DL, PtrType),
9612                                Flags);
9613   AddToWorklist(NewPtr.getNode());
9614 
9615   SDValue Load;
9616   if (ExtType == ISD::NON_EXTLOAD)
9617     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9618                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9619                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9620   else
9621     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9622                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9623                           NewAlign, LN0->getMemOperand()->getFlags(),
9624                           LN0->getAAInfo());
9625 
9626   // Replace the old load's chain with the new load's chain.
9627   WorklistRemover DeadNodes(*this);
9628   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9629 
9630   // Shift the result left, if we've swallowed a left shift.
9631   SDValue Result = Load;
9632   if (ShLeftAmt != 0) {
9633     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9634     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9635       ShImmTy = VT;
9636     // If the shift amount is as large as the result size (but, presumably,
9637     // no larger than the source) then the useful bits of the result are
9638     // zero; we can't simply return the shortened shift, because the result
9639     // of that operation is undefined.
9640     SDLoc DL(N0);
9641     if (ShLeftAmt >= VT.getSizeInBits())
9642       Result = DAG.getConstant(0, DL, VT);
9643     else
9644       Result = DAG.getNode(ISD::SHL, DL, VT,
9645                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9646   }
9647 
9648   if (HasShiftedOffset) {
9649     // Recalculate the shift amount after it has been altered to calculate
9650     // the offset.
9651     if (DAG.getDataLayout().isBigEndian())
9652       ShAmt = AdjustBigEndianShift(ShAmt);
9653 
9654     // We're using a shifted mask, so the load now has an offset. This means
9655     // that data has been loaded into the lower bytes than it would have been
9656     // before, so we need to shl the loaded data into the correct position in the
9657     // register.
9658     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9659     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9660     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9661   }
9662 
9663   // Return the new loaded value.
9664   return Result;
9665 }
9666 
9667 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9668   SDValue N0 = N->getOperand(0);
9669   SDValue N1 = N->getOperand(1);
9670   EVT VT = N->getValueType(0);
9671   EVT EVT = cast<VTSDNode>(N1)->getVT();
9672   unsigned VTBits = VT.getScalarSizeInBits();
9673   unsigned EVTBits = EVT.getScalarSizeInBits();
9674 
9675   if (N0.isUndef())
9676     return DAG.getUNDEF(VT);
9677 
9678   // fold (sext_in_reg c1) -> c1
9679   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9680     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9681 
9682   // If the input is already sign extended, just drop the extension.
9683   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9684     return N0;
9685 
9686   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9687   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9688       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9689     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9690                        N0.getOperand(0), N1);
9691 
9692   // fold (sext_in_reg (sext x)) -> (sext x)
9693   // fold (sext_in_reg (aext x)) -> (sext x)
9694   // if x is small enough or if we know that x has more than 1 sign bit and the
9695   // sign_extend_inreg is extending from one of them.
9696   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9697     SDValue N00 = N0.getOperand(0);
9698     unsigned N00Bits = N00.getScalarValueSizeInBits();
9699     if ((N00Bits <= EVTBits ||
9700          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9701         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9702       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9703   }
9704 
9705   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9706   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9707        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9708        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9709       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9710     if (!LegalOperations ||
9711         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9712       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9713                          N0.getOperand(0));
9714   }
9715 
9716   // fold (sext_in_reg (zext x)) -> (sext x)
9717   // iff we are extending the source sign bit.
9718   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9719     SDValue N00 = N0.getOperand(0);
9720     if (N00.getScalarValueSizeInBits() == EVTBits &&
9721         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9722       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9723   }
9724 
9725   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9726   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9727     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9728 
9729   // fold operands of sext_in_reg based on knowledge that the top bits are not
9730   // demanded.
9731   if (SimplifyDemandedBits(SDValue(N, 0)))
9732     return SDValue(N, 0);
9733 
9734   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9735   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9736   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9737     return NarrowLoad;
9738 
9739   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9740   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9741   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9742   if (N0.getOpcode() == ISD::SRL) {
9743     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9744       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9745         // We can turn this into an SRA iff the input to the SRL is already sign
9746         // extended enough.
9747         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9748         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9749           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9750                              N0.getOperand(0), N0.getOperand(1));
9751       }
9752   }
9753 
9754   // fold (sext_inreg (extload x)) -> (sextload x)
9755   // If sextload is not supported by target, we can only do the combine when
9756   // load has one use. Doing otherwise can block folding the extload with other
9757   // extends that the target does support.
9758   if (ISD::isEXTLoad(N0.getNode()) &&
9759       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9760       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9761       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9762         N0.hasOneUse()) ||
9763        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9764     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9765     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9766                                      LN0->getChain(),
9767                                      LN0->getBasePtr(), EVT,
9768                                      LN0->getMemOperand());
9769     CombineTo(N, ExtLoad);
9770     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9771     AddToWorklist(ExtLoad.getNode());
9772     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9773   }
9774   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9775   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9776       N0.hasOneUse() &&
9777       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9778       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9779        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9780     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9781     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9782                                      LN0->getChain(),
9783                                      LN0->getBasePtr(), EVT,
9784                                      LN0->getMemOperand());
9785     CombineTo(N, ExtLoad);
9786     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9787     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9788   }
9789 
9790   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9791   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9792     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9793                                            N0.getOperand(1), false))
9794       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9795                          BSwap, N1);
9796   }
9797 
9798   return SDValue();
9799 }
9800 
9801 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9802   SDValue N0 = N->getOperand(0);
9803   EVT VT = N->getValueType(0);
9804 
9805   if (N0.isUndef())
9806     return DAG.getUNDEF(VT);
9807 
9808   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9809     return Res;
9810 
9811   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9812     return SDValue(N, 0);
9813 
9814   return SDValue();
9815 }
9816 
9817 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
9818   SDValue N0 = N->getOperand(0);
9819   EVT VT = N->getValueType(0);
9820 
9821   if (N0.isUndef())
9822     return DAG.getUNDEF(VT);
9823 
9824   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9825     return Res;
9826 
9827   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
9828     return SDValue(N, 0);
9829 
9830   return SDValue();
9831 }
9832 
9833 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
9834   SDValue N0 = N->getOperand(0);
9835   EVT VT = N->getValueType(0);
9836   bool isLE = DAG.getDataLayout().isLittleEndian();
9837 
9838   // noop truncate
9839   if (N0.getValueType() == N->getValueType(0))
9840     return N0;
9841 
9842   // fold (truncate (truncate x)) -> (truncate x)
9843   if (N0.getOpcode() == ISD::TRUNCATE)
9844     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9845 
9846   // fold (truncate c1) -> c1
9847   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
9848     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
9849     if (C.getNode() != N)
9850       return C;
9851   }
9852 
9853   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
9854   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
9855       N0.getOpcode() == ISD::SIGN_EXTEND ||
9856       N0.getOpcode() == ISD::ANY_EXTEND) {
9857     // if the source is smaller than the dest, we still need an extend.
9858     if (N0.getOperand(0).getValueType().bitsLT(VT))
9859       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9860     // if the source is larger than the dest, than we just need the truncate.
9861     if (N0.getOperand(0).getValueType().bitsGT(VT))
9862       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
9863     // if the source and dest are the same type, we can drop both the extend
9864     // and the truncate.
9865     return N0.getOperand(0);
9866   }
9867 
9868   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
9869   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
9870     return SDValue();
9871 
9872   // Fold extract-and-trunc into a narrow extract. For example:
9873   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
9874   //   i32 y = TRUNCATE(i64 x)
9875   //        -- becomes --
9876   //   v16i8 b = BITCAST (v2i64 val)
9877   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
9878   //
9879   // Note: We only run this optimization after type legalization (which often
9880   // creates this pattern) and before operation legalization after which
9881   // we need to be more careful about the vector instructions that we generate.
9882   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9883       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
9884     EVT VecTy = N0.getOperand(0).getValueType();
9885     EVT ExTy = N0.getValueType();
9886     EVT TrTy = N->getValueType(0);
9887 
9888     unsigned NumElem = VecTy.getVectorNumElements();
9889     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
9890 
9891     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
9892     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
9893 
9894     SDValue EltNo = N0->getOperand(1);
9895     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
9896       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
9897       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
9898       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
9899 
9900       SDLoc DL(N);
9901       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
9902                          DAG.getBitcast(NVT, N0.getOperand(0)),
9903                          DAG.getConstant(Index, DL, IndexTy));
9904     }
9905   }
9906 
9907   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
9908   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
9909     EVT SrcVT = N0.getValueType();
9910     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
9911         TLI.isTruncateFree(SrcVT, VT)) {
9912       SDLoc SL(N0);
9913       SDValue Cond = N0.getOperand(0);
9914       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
9915       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
9916       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
9917     }
9918   }
9919 
9920   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
9921   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9922       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
9923       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
9924     SDValue Amt = N0.getOperand(1);
9925     KnownBits Known = DAG.computeKnownBits(Amt);
9926     unsigned Size = VT.getScalarSizeInBits();
9927     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
9928       SDLoc SL(N);
9929       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
9930 
9931       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
9932       if (AmtVT != Amt.getValueType()) {
9933         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
9934         AddToWorklist(Amt.getNode());
9935       }
9936       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
9937     }
9938   }
9939 
9940   // Fold a series of buildvector, bitcast, and truncate if possible.
9941   // For example fold
9942   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
9943   //   (2xi32 (buildvector x, y)).
9944   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
9945       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
9946       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
9947       N0.getOperand(0).hasOneUse()) {
9948     SDValue BuildVect = N0.getOperand(0);
9949     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
9950     EVT TruncVecEltTy = VT.getVectorElementType();
9951 
9952     // Check that the element types match.
9953     if (BuildVectEltTy == TruncVecEltTy) {
9954       // Now we only need to compute the offset of the truncated elements.
9955       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
9956       unsigned TruncVecNumElts = VT.getVectorNumElements();
9957       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
9958 
9959       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
9960              "Invalid number of elements");
9961 
9962       SmallVector<SDValue, 8> Opnds;
9963       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
9964         Opnds.push_back(BuildVect.getOperand(i));
9965 
9966       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
9967     }
9968   }
9969 
9970   // See if we can simplify the input to this truncate through knowledge that
9971   // only the low bits are being used.
9972   // For example "trunc (or (shl x, 8), y)" // -> trunc y
9973   // Currently we only perform this optimization on scalars because vectors
9974   // may have different active low bits.
9975   if (!VT.isVector()) {
9976     APInt Mask =
9977         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
9978     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
9979       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
9980   }
9981 
9982   // fold (truncate (load x)) -> (smaller load x)
9983   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
9984   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
9985     if (SDValue Reduced = ReduceLoadWidth(N))
9986       return Reduced;
9987 
9988     // Handle the case where the load remains an extending load even
9989     // after truncation.
9990     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
9991       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9992       if (!LN0->isVolatile() &&
9993           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
9994         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
9995                                          VT, LN0->getChain(), LN0->getBasePtr(),
9996                                          LN0->getMemoryVT(),
9997                                          LN0->getMemOperand());
9998         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
9999         return NewLoad;
10000       }
10001     }
10002   }
10003 
10004   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10005   // where ... are all 'undef'.
10006   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10007     SmallVector<EVT, 8> VTs;
10008     SDValue V;
10009     unsigned Idx = 0;
10010     unsigned NumDefs = 0;
10011 
10012     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10013       SDValue X = N0.getOperand(i);
10014       if (!X.isUndef()) {
10015         V = X;
10016         Idx = i;
10017         NumDefs++;
10018       }
10019       // Stop if more than one members are non-undef.
10020       if (NumDefs > 1)
10021         break;
10022       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10023                                      VT.getVectorElementType(),
10024                                      X.getValueType().getVectorNumElements()));
10025     }
10026 
10027     if (NumDefs == 0)
10028       return DAG.getUNDEF(VT);
10029 
10030     if (NumDefs == 1) {
10031       assert(V.getNode() && "The single defined operand is empty!");
10032       SmallVector<SDValue, 8> Opnds;
10033       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10034         if (i != Idx) {
10035           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10036           continue;
10037         }
10038         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10039         AddToWorklist(NV.getNode());
10040         Opnds.push_back(NV);
10041       }
10042       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10043     }
10044   }
10045 
10046   // Fold truncate of a bitcast of a vector to an extract of the low vector
10047   // element.
10048   //
10049   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10050   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10051     SDValue VecSrc = N0.getOperand(0);
10052     EVT SrcVT = VecSrc.getValueType();
10053     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10054         (!LegalOperations ||
10055          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10056       SDLoc SL(N);
10057 
10058       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10059       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10060       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10061                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10062     }
10063   }
10064 
10065   // Simplify the operands using demanded-bits information.
10066   if (!VT.isVector() &&
10067       SimplifyDemandedBits(SDValue(N, 0)))
10068     return SDValue(N, 0);
10069 
10070   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10071   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10072   // When the adde's carry is not used.
10073   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10074       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10075       (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
10076     SDLoc SL(N);
10077     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10078     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10079     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10080     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10081   }
10082 
10083   // fold (truncate (extract_subvector(ext x))) ->
10084   //      (extract_subvector x)
10085   // TODO: This can be generalized to cover cases where the truncate and extract
10086   // do not fully cancel each other out.
10087   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10088     SDValue N00 = N0.getOperand(0);
10089     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10090         N00.getOpcode() == ISD::ZERO_EXTEND ||
10091         N00.getOpcode() == ISD::ANY_EXTEND) {
10092       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10093           VT.getVectorElementType())
10094         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10095                            N00.getOperand(0), N0.getOperand(1));
10096     }
10097   }
10098 
10099   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10100     return NewVSel;
10101 
10102   // Narrow a suitable binary operation with a non-opaque constant operand by
10103   // moving it ahead of the truncate. This is limited to pre-legalization
10104   // because targets may prefer a wider type during later combines and invert
10105   // this transform.
10106   switch (N0.getOpcode()) {
10107   case ISD::ADD:
10108   case ISD::SUB:
10109   case ISD::MUL:
10110   case ISD::AND:
10111   case ISD::OR:
10112   case ISD::XOR:
10113     if (!LegalOperations && N0.hasOneUse() &&
10114         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10115          isConstantOrConstantVector(N0.getOperand(1), true))) {
10116       // TODO: We already restricted this to pre-legalization, but for vectors
10117       // we are extra cautious to not create an unsupported operation.
10118       // Target-specific changes are likely needed to avoid regressions here.
10119       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10120         SDLoc DL(N);
10121         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10122         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10123         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10124       }
10125     }
10126   }
10127 
10128   return SDValue();
10129 }
10130 
10131 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10132   SDValue Elt = N->getOperand(i);
10133   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10134     return Elt.getNode();
10135   return Elt.getOperand(Elt.getResNo()).getNode();
10136 }
10137 
10138 /// build_pair (load, load) -> load
10139 /// if load locations are consecutive.
10140 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10141   assert(N->getOpcode() == ISD::BUILD_PAIR);
10142 
10143   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10144   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10145 
10146   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10147   // most significant part in elt 1. So when combining into one large load, we
10148   // need to consider the endianness.
10149   if (DAG.getDataLayout().isBigEndian())
10150     std::swap(LD1, LD2);
10151 
10152   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10153       LD1->getAddressSpace() != LD2->getAddressSpace())
10154     return SDValue();
10155   EVT LD1VT = LD1->getValueType(0);
10156   unsigned LD1Bytes = LD1VT.getStoreSize();
10157   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10158       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10159     unsigned Align = LD1->getAlignment();
10160     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10161         VT.getTypeForEVT(*DAG.getContext()));
10162 
10163     if (NewAlign <= Align &&
10164         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10165       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10166                          LD1->getPointerInfo(), Align);
10167   }
10168 
10169   return SDValue();
10170 }
10171 
10172 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10173   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10174   // and Lo parts; on big-endian machines it doesn't.
10175   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10176 }
10177 
10178 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10179                                     const TargetLowering &TLI) {
10180   // If this is not a bitcast to an FP type or if the target doesn't have
10181   // IEEE754-compliant FP logic, we're done.
10182   EVT VT = N->getValueType(0);
10183   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10184     return SDValue();
10185 
10186   // TODO: Handle cases where the integer constant is a different scalar
10187   // bitwidth to the FP.
10188   SDValue N0 = N->getOperand(0);
10189   EVT SourceVT = N0.getValueType();
10190   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10191     return SDValue();
10192 
10193   unsigned FPOpcode;
10194   APInt SignMask;
10195   switch (N0.getOpcode()) {
10196   case ISD::AND:
10197     FPOpcode = ISD::FABS;
10198     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10199     break;
10200   case ISD::XOR:
10201     FPOpcode = ISD::FNEG;
10202     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10203     break;
10204   case ISD::OR:
10205     FPOpcode = ISD::FABS;
10206     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10207     break;
10208   default:
10209     return SDValue();
10210   }
10211 
10212   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10213   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10214   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10215   //   fneg (fabs X)
10216   SDValue LogicOp0 = N0.getOperand(0);
10217   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10218   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10219       LogicOp0.getOpcode() == ISD::BITCAST &&
10220       LogicOp0.getOperand(0).getValueType() == VT) {
10221     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10222     NumFPLogicOpsConv++;
10223     if (N0.getOpcode() == ISD::OR)
10224       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10225     return FPOp;
10226   }
10227 
10228   return SDValue();
10229 }
10230 
10231 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10232   SDValue N0 = N->getOperand(0);
10233   EVT VT = N->getValueType(0);
10234 
10235   if (N0.isUndef())
10236     return DAG.getUNDEF(VT);
10237 
10238   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10239   // Only do this before legalize types, unless both types are integer and the
10240   // scalar type is legal. Only do this before legalize ops, since the target
10241   // maybe depending on the bitcast.
10242   // First check to see if this is all constant.
10243   // TODO: Support FP bitcasts after legalize types.
10244   if (VT.isVector() &&
10245       (!LegalTypes ||
10246        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10247         TLI.isTypeLegal(VT.getVectorElementType()))) &&
10248       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10249       cast<BuildVectorSDNode>(N0)->isConstant())
10250     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10251                                              VT.getVectorElementType());
10252 
10253   // If the input is a constant, let getNode fold it.
10254   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10255     // If we can't allow illegal operations, we need to check that this is just
10256     // a fp -> int or int -> conversion and that the resulting operation will
10257     // be legal.
10258     if (!LegalOperations ||
10259         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10260          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10261         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10262          TLI.isOperationLegal(ISD::Constant, VT))) {
10263       SDValue C = DAG.getBitcast(VT, N0);
10264       if (C.getNode() != N)
10265         return C;
10266     }
10267   }
10268 
10269   // (conv (conv x, t1), t2) -> (conv x, t2)
10270   if (N0.getOpcode() == ISD::BITCAST)
10271     return DAG.getBitcast(VT, N0.getOperand(0));
10272 
10273   // fold (conv (load x)) -> (load (conv*)x)
10274   // If the resultant load doesn't need a higher alignment than the original!
10275   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10276       // Do not remove the cast if the types differ in endian layout.
10277       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10278           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10279       // If the load is volatile, we only want to change the load type if the
10280       // resulting load is legal. Otherwise we might increase the number of
10281       // memory accesses. We don't care if the original type was legal or not
10282       // as we assume software couldn't rely on the number of accesses of an
10283       // illegal type.
10284       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10285        TLI.isOperationLegal(ISD::LOAD, VT)) &&
10286       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10287     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10288     unsigned OrigAlign = LN0->getAlignment();
10289 
10290     bool Fast = false;
10291     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10292                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
10293         Fast) {
10294       SDValue Load =
10295           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10296                       LN0->getPointerInfo(), OrigAlign,
10297                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10298       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10299       return Load;
10300     }
10301   }
10302 
10303   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10304     return V;
10305 
10306   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10307   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10308   //
10309   // For ppc_fp128:
10310   // fold (bitcast (fneg x)) ->
10311   //     flipbit = signbit
10312   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10313   //
10314   // fold (bitcast (fabs x)) ->
10315   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10316   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10317   // This often reduces constant pool loads.
10318   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10319        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10320       N0.getNode()->hasOneUse() && VT.isInteger() &&
10321       !VT.isVector() && !N0.getValueType().isVector()) {
10322     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10323     AddToWorklist(NewConv.getNode());
10324 
10325     SDLoc DL(N);
10326     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10327       assert(VT.getSizeInBits() == 128);
10328       SDValue SignBit = DAG.getConstant(
10329           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10330       SDValue FlipBit;
10331       if (N0.getOpcode() == ISD::FNEG) {
10332         FlipBit = SignBit;
10333         AddToWorklist(FlipBit.getNode());
10334       } else {
10335         assert(N0.getOpcode() == ISD::FABS);
10336         SDValue Hi =
10337             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10338                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10339                                               SDLoc(NewConv)));
10340         AddToWorklist(Hi.getNode());
10341         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10342         AddToWorklist(FlipBit.getNode());
10343       }
10344       SDValue FlipBits =
10345           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10346       AddToWorklist(FlipBits.getNode());
10347       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10348     }
10349     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10350     if (N0.getOpcode() == ISD::FNEG)
10351       return DAG.getNode(ISD::XOR, DL, VT,
10352                          NewConv, DAG.getConstant(SignBit, DL, VT));
10353     assert(N0.getOpcode() == ISD::FABS);
10354     return DAG.getNode(ISD::AND, DL, VT,
10355                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10356   }
10357 
10358   // fold (bitconvert (fcopysign cst, x)) ->
10359   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10360   // Note that we don't handle (copysign x, cst) because this can always be
10361   // folded to an fneg or fabs.
10362   //
10363   // For ppc_fp128:
10364   // fold (bitcast (fcopysign cst, x)) ->
10365   //     flipbit = (and (extract_element
10366   //                     (xor (bitcast cst), (bitcast x)), 0),
10367   //                    signbit)
10368   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10369   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10370       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10371       VT.isInteger() && !VT.isVector()) {
10372     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10373     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10374     if (isTypeLegal(IntXVT)) {
10375       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10376       AddToWorklist(X.getNode());
10377 
10378       // If X has a different width than the result/lhs, sext it or truncate it.
10379       unsigned VTWidth = VT.getSizeInBits();
10380       if (OrigXWidth < VTWidth) {
10381         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10382         AddToWorklist(X.getNode());
10383       } else if (OrigXWidth > VTWidth) {
10384         // To get the sign bit in the right place, we have to shift it right
10385         // before truncating.
10386         SDLoc DL(X);
10387         X = DAG.getNode(ISD::SRL, DL,
10388                         X.getValueType(), X,
10389                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10390                                         X.getValueType()));
10391         AddToWorklist(X.getNode());
10392         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10393         AddToWorklist(X.getNode());
10394       }
10395 
10396       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10397         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10398         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10399         AddToWorklist(Cst.getNode());
10400         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10401         AddToWorklist(X.getNode());
10402         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10403         AddToWorklist(XorResult.getNode());
10404         SDValue XorResult64 = DAG.getNode(
10405             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10406             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10407                                   SDLoc(XorResult)));
10408         AddToWorklist(XorResult64.getNode());
10409         SDValue FlipBit =
10410             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10411                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10412         AddToWorklist(FlipBit.getNode());
10413         SDValue FlipBits =
10414             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10415         AddToWorklist(FlipBits.getNode());
10416         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10417       }
10418       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10419       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10420                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10421       AddToWorklist(X.getNode());
10422 
10423       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10424       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10425                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10426       AddToWorklist(Cst.getNode());
10427 
10428       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10429     }
10430   }
10431 
10432   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10433   if (N0.getOpcode() == ISD::BUILD_PAIR)
10434     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10435       return CombineLD;
10436 
10437   // Remove double bitcasts from shuffles - this is often a legacy of
10438   // XformToShuffleWithZero being used to combine bitmaskings (of
10439   // float vectors bitcast to integer vectors) into shuffles.
10440   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10441   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10442       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10443       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10444       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10445     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10446 
10447     // If operands are a bitcast, peek through if it casts the original VT.
10448     // If operands are a constant, just bitcast back to original VT.
10449     auto PeekThroughBitcast = [&](SDValue Op) {
10450       if (Op.getOpcode() == ISD::BITCAST &&
10451           Op.getOperand(0).getValueType() == VT)
10452         return SDValue(Op.getOperand(0));
10453       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10454           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10455         return DAG.getBitcast(VT, Op);
10456       return SDValue();
10457     };
10458 
10459     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10460     // the result type of this bitcast. This would eliminate at least one
10461     // bitcast. See the transform in InstCombine.
10462     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10463     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10464     if (!(SV0 && SV1))
10465       return SDValue();
10466 
10467     int MaskScale =
10468         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10469     SmallVector<int, 8> NewMask;
10470     for (int M : SVN->getMask())
10471       for (int i = 0; i != MaskScale; ++i)
10472         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10473 
10474     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10475     if (!LegalMask) {
10476       std::swap(SV0, SV1);
10477       ShuffleVectorSDNode::commuteMask(NewMask);
10478       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10479     }
10480 
10481     if (LegalMask)
10482       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10483   }
10484 
10485   return SDValue();
10486 }
10487 
10488 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10489   EVT VT = N->getValueType(0);
10490   return CombineConsecutiveLoads(N, VT);
10491 }
10492 
10493 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10494 /// operands. DstEltVT indicates the destination element value type.
10495 SDValue DAGCombiner::
10496 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10497   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10498 
10499   // If this is already the right type, we're done.
10500   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10501 
10502   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10503   unsigned DstBitSize = DstEltVT.getSizeInBits();
10504 
10505   // If this is a conversion of N elements of one type to N elements of another
10506   // type, convert each element.  This handles FP<->INT cases.
10507   if (SrcBitSize == DstBitSize) {
10508     SmallVector<SDValue, 8> Ops;
10509     for (SDValue Op : BV->op_values()) {
10510       // If the vector element type is not legal, the BUILD_VECTOR operands
10511       // are promoted and implicitly truncated.  Make that explicit here.
10512       if (Op.getValueType() != SrcEltVT)
10513         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10514       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10515       AddToWorklist(Ops.back().getNode());
10516     }
10517     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10518                               BV->getValueType(0).getVectorNumElements());
10519     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10520   }
10521 
10522   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10523   // handle annoying details of growing/shrinking FP values, we convert them to
10524   // int first.
10525   if (SrcEltVT.isFloatingPoint()) {
10526     // Convert the input float vector to a int vector where the elements are the
10527     // same sizes.
10528     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10529     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10530     SrcEltVT = IntVT;
10531   }
10532 
10533   // Now we know the input is an integer vector.  If the output is a FP type,
10534   // convert to integer first, then to FP of the right size.
10535   if (DstEltVT.isFloatingPoint()) {
10536     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10537     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10538 
10539     // Next, convert to FP elements of the same size.
10540     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10541   }
10542 
10543   SDLoc DL(BV);
10544 
10545   // Okay, we know the src/dst types are both integers of differing types.
10546   // Handling growing first.
10547   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10548   if (SrcBitSize < DstBitSize) {
10549     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10550 
10551     SmallVector<SDValue, 8> Ops;
10552     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10553          i += NumInputsPerOutput) {
10554       bool isLE = DAG.getDataLayout().isLittleEndian();
10555       APInt NewBits = APInt(DstBitSize, 0);
10556       bool EltIsUndef = true;
10557       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10558         // Shift the previously computed bits over.
10559         NewBits <<= SrcBitSize;
10560         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10561         if (Op.isUndef()) continue;
10562         EltIsUndef = false;
10563 
10564         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10565                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10566       }
10567 
10568       if (EltIsUndef)
10569         Ops.push_back(DAG.getUNDEF(DstEltVT));
10570       else
10571         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10572     }
10573 
10574     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10575     return DAG.getBuildVector(VT, DL, Ops);
10576   }
10577 
10578   // Finally, this must be the case where we are shrinking elements: each input
10579   // turns into multiple outputs.
10580   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10581   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10582                             NumOutputsPerInput*BV->getNumOperands());
10583   SmallVector<SDValue, 8> Ops;
10584 
10585   for (const SDValue &Op : BV->op_values()) {
10586     if (Op.isUndef()) {
10587       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10588       continue;
10589     }
10590 
10591     APInt OpVal = cast<ConstantSDNode>(Op)->
10592                   getAPIntValue().zextOrTrunc(SrcBitSize);
10593 
10594     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10595       APInt ThisVal = OpVal.trunc(DstBitSize);
10596       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10597       OpVal.lshrInPlace(DstBitSize);
10598     }
10599 
10600     // For big endian targets, swap the order of the pieces of each element.
10601     if (DAG.getDataLayout().isBigEndian())
10602       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10603   }
10604 
10605   return DAG.getBuildVector(VT, DL, Ops);
10606 }
10607 
10608 static bool isContractable(SDNode *N) {
10609   SDNodeFlags F = N->getFlags();
10610   return F.hasAllowContract() || F.hasAllowReassociation();
10611 }
10612 
10613 /// Try to perform FMA combining on a given FADD node.
10614 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10615   SDValue N0 = N->getOperand(0);
10616   SDValue N1 = N->getOperand(1);
10617   EVT VT = N->getValueType(0);
10618   SDLoc SL(N);
10619 
10620   const TargetOptions &Options = DAG.getTarget().Options;
10621 
10622   // Floating-point multiply-add with intermediate rounding.
10623   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10624 
10625   // Floating-point multiply-add without intermediate rounding.
10626   bool HasFMA =
10627       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10628       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10629 
10630   // No valid opcode, do not combine.
10631   if (!HasFMAD && !HasFMA)
10632     return SDValue();
10633 
10634   SDNodeFlags Flags = N->getFlags();
10635   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10636   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10637                               CanFuse || HasFMAD);
10638   // If the addition is not contractable, do not combine.
10639   if (!AllowFusionGlobally && !isContractable(N))
10640     return SDValue();
10641 
10642   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10643   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10644     return SDValue();
10645 
10646   // Always prefer FMAD to FMA for precision.
10647   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10648   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10649 
10650   // Is the node an FMUL and contractable either due to global flags or
10651   // SDNodeFlags.
10652   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10653     if (N.getOpcode() != ISD::FMUL)
10654       return false;
10655     return AllowFusionGlobally || isContractable(N.getNode());
10656   };
10657   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10658   // prefer to fold the multiply with fewer uses.
10659   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10660     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10661       std::swap(N0, N1);
10662   }
10663 
10664   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10665   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10666     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10667                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10668   }
10669 
10670   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10671   // Note: Commutes FADD operands.
10672   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10673     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10674                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10675   }
10676 
10677   // Look through FP_EXTEND nodes to do more combining.
10678 
10679   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10680   if (N0.getOpcode() == ISD::FP_EXTEND) {
10681     SDValue N00 = N0.getOperand(0);
10682     if (isContractableFMUL(N00) &&
10683         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10684       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10685                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10686                                      N00.getOperand(0)),
10687                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10688                                      N00.getOperand(1)), N1, Flags);
10689     }
10690   }
10691 
10692   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10693   // Note: Commutes FADD operands.
10694   if (N1.getOpcode() == ISD::FP_EXTEND) {
10695     SDValue N10 = N1.getOperand(0);
10696     if (isContractableFMUL(N10) &&
10697         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10698       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10699                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10700                                      N10.getOperand(0)),
10701                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10702                                      N10.getOperand(1)), N0, Flags);
10703     }
10704   }
10705 
10706   // More folding opportunities when target permits.
10707   if (Aggressive) {
10708     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10709     if (CanFuse &&
10710         N0.getOpcode() == PreferredFusedOpcode &&
10711         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10712         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10713       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10714                          N0.getOperand(0), N0.getOperand(1),
10715                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10716                                      N0.getOperand(2).getOperand(0),
10717                                      N0.getOperand(2).getOperand(1),
10718                                      N1, Flags), Flags);
10719     }
10720 
10721     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10722     if (CanFuse &&
10723         N1->getOpcode() == PreferredFusedOpcode &&
10724         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10725         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10726       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10727                          N1.getOperand(0), N1.getOperand(1),
10728                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10729                                      N1.getOperand(2).getOperand(0),
10730                                      N1.getOperand(2).getOperand(1),
10731                                      N0, Flags), Flags);
10732     }
10733 
10734 
10735     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10736     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10737     auto FoldFAddFMAFPExtFMul = [&] (
10738       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10739       SDNodeFlags Flags) {
10740       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10741                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10742                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10743                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10744                                      Z, Flags), Flags);
10745     };
10746     if (N0.getOpcode() == PreferredFusedOpcode) {
10747       SDValue N02 = N0.getOperand(2);
10748       if (N02.getOpcode() == ISD::FP_EXTEND) {
10749         SDValue N020 = N02.getOperand(0);
10750         if (isContractableFMUL(N020) &&
10751             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10752           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10753                                       N020.getOperand(0), N020.getOperand(1),
10754                                       N1, Flags);
10755         }
10756       }
10757     }
10758 
10759     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10760     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10761     // FIXME: This turns two single-precision and one double-precision
10762     // operation into two double-precision operations, which might not be
10763     // interesting for all targets, especially GPUs.
10764     auto FoldFAddFPExtFMAFMul = [&] (
10765       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10766       SDNodeFlags Flags) {
10767       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10768                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10769                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10770                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10771                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10772                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10773                                      Z, Flags), Flags);
10774     };
10775     if (N0.getOpcode() == ISD::FP_EXTEND) {
10776       SDValue N00 = N0.getOperand(0);
10777       if (N00.getOpcode() == PreferredFusedOpcode) {
10778         SDValue N002 = N00.getOperand(2);
10779         if (isContractableFMUL(N002) &&
10780             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10781           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10782                                       N002.getOperand(0), N002.getOperand(1),
10783                                       N1, Flags);
10784         }
10785       }
10786     }
10787 
10788     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10789     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10790     if (N1.getOpcode() == PreferredFusedOpcode) {
10791       SDValue N12 = N1.getOperand(2);
10792       if (N12.getOpcode() == ISD::FP_EXTEND) {
10793         SDValue N120 = N12.getOperand(0);
10794         if (isContractableFMUL(N120) &&
10795             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
10796           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
10797                                       N120.getOperand(0), N120.getOperand(1),
10798                                       N0, Flags);
10799         }
10800       }
10801     }
10802 
10803     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
10804     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
10805     // FIXME: This turns two single-precision and one double-precision
10806     // operation into two double-precision operations, which might not be
10807     // interesting for all targets, especially GPUs.
10808     if (N1.getOpcode() == ISD::FP_EXTEND) {
10809       SDValue N10 = N1.getOperand(0);
10810       if (N10.getOpcode() == PreferredFusedOpcode) {
10811         SDValue N102 = N10.getOperand(2);
10812         if (isContractableFMUL(N102) &&
10813             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10814           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
10815                                       N102.getOperand(0), N102.getOperand(1),
10816                                       N0, Flags);
10817         }
10818       }
10819     }
10820   }
10821 
10822   return SDValue();
10823 }
10824 
10825 /// Try to perform FMA combining on a given FSUB node.
10826 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
10827   SDValue N0 = N->getOperand(0);
10828   SDValue N1 = N->getOperand(1);
10829   EVT VT = N->getValueType(0);
10830   SDLoc SL(N);
10831 
10832   const TargetOptions &Options = DAG.getTarget().Options;
10833   // Floating-point multiply-add with intermediate rounding.
10834   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10835 
10836   // Floating-point multiply-add without intermediate rounding.
10837   bool HasFMA =
10838       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10839       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10840 
10841   // No valid opcode, do not combine.
10842   if (!HasFMAD && !HasFMA)
10843     return SDValue();
10844 
10845   const SDNodeFlags Flags = N->getFlags();
10846   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10847   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10848                               CanFuse || HasFMAD);
10849 
10850   // If the subtraction is not contractable, do not combine.
10851   if (!AllowFusionGlobally && !isContractable(N))
10852     return SDValue();
10853 
10854   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10855   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10856     return SDValue();
10857 
10858   // Always prefer FMAD to FMA for precision.
10859   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10860   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10861 
10862   // Is the node an FMUL and contractable either due to global flags or
10863   // SDNodeFlags.
10864   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10865     if (N.getOpcode() != ISD::FMUL)
10866       return false;
10867     return AllowFusionGlobally || isContractable(N.getNode());
10868   };
10869 
10870   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
10871   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10872     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10873                        N0.getOperand(0), N0.getOperand(1),
10874                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10875   }
10876 
10877   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
10878   // Note: Commutes FSUB operands.
10879   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10880     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10881                        DAG.getNode(ISD::FNEG, SL, VT,
10882                                    N1.getOperand(0)),
10883                        N1.getOperand(1), N0, Flags);
10884   }
10885 
10886   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10887   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
10888       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
10889     SDValue N00 = N0.getOperand(0).getOperand(0);
10890     SDValue N01 = N0.getOperand(0).getOperand(1);
10891     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10892                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
10893                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10894   }
10895 
10896   // Look through FP_EXTEND nodes to do more combining.
10897 
10898   // fold (fsub (fpext (fmul x, y)), z)
10899   //   -> (fma (fpext x), (fpext y), (fneg z))
10900   if (N0.getOpcode() == ISD::FP_EXTEND) {
10901     SDValue N00 = N0.getOperand(0);
10902     if (isContractableFMUL(N00) &&
10903         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10904       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10905                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10906                                      N00.getOperand(0)),
10907                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10908                                      N00.getOperand(1)),
10909                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
10910     }
10911   }
10912 
10913   // fold (fsub x, (fpext (fmul y, z)))
10914   //   -> (fma (fneg (fpext y)), (fpext z), x)
10915   // Note: Commutes FSUB operands.
10916   if (N1.getOpcode() == ISD::FP_EXTEND) {
10917     SDValue N10 = N1.getOperand(0);
10918     if (isContractableFMUL(N10) &&
10919         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10920       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10921                          DAG.getNode(ISD::FNEG, SL, VT,
10922                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
10923                                                  N10.getOperand(0))),
10924                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10925                                      N10.getOperand(1)),
10926                          N0, Flags);
10927     }
10928   }
10929 
10930   // fold (fsub (fpext (fneg (fmul, x, y))), z)
10931   //   -> (fneg (fma (fpext x), (fpext y), z))
10932   // Note: This could be removed with appropriate canonicalization of the
10933   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10934   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10935   // from implementing the canonicalization in visitFSUB.
10936   if (N0.getOpcode() == ISD::FP_EXTEND) {
10937     SDValue N00 = N0.getOperand(0);
10938     if (N00.getOpcode() == ISD::FNEG) {
10939       SDValue N000 = N00.getOperand(0);
10940       if (isContractableFMUL(N000) &&
10941           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10942         return DAG.getNode(ISD::FNEG, SL, VT,
10943                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10944                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10945                                                    N000.getOperand(0)),
10946                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10947                                                    N000.getOperand(1)),
10948                                        N1, Flags));
10949       }
10950     }
10951   }
10952 
10953   // fold (fsub (fneg (fpext (fmul, x, y))), z)
10954   //   -> (fneg (fma (fpext x)), (fpext y), z)
10955   // Note: This could be removed with appropriate canonicalization of the
10956   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
10957   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
10958   // from implementing the canonicalization in visitFSUB.
10959   if (N0.getOpcode() == ISD::FNEG) {
10960     SDValue N00 = N0.getOperand(0);
10961     if (N00.getOpcode() == ISD::FP_EXTEND) {
10962       SDValue N000 = N00.getOperand(0);
10963       if (isContractableFMUL(N000) &&
10964           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
10965         return DAG.getNode(ISD::FNEG, SL, VT,
10966                            DAG.getNode(PreferredFusedOpcode, SL, VT,
10967                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10968                                                    N000.getOperand(0)),
10969                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
10970                                                    N000.getOperand(1)),
10971                                        N1, Flags));
10972       }
10973     }
10974   }
10975 
10976   // More folding opportunities when target permits.
10977   if (Aggressive) {
10978     // fold (fsub (fma x, y, (fmul u, v)), z)
10979     //   -> (fma x, y (fma u, v, (fneg z)))
10980     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
10981         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
10982         N0.getOperand(2)->hasOneUse()) {
10983       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10984                          N0.getOperand(0), N0.getOperand(1),
10985                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10986                                      N0.getOperand(2).getOperand(0),
10987                                      N0.getOperand(2).getOperand(1),
10988                                      DAG.getNode(ISD::FNEG, SL, VT,
10989                                                  N1), Flags), Flags);
10990     }
10991 
10992     // fold (fsub x, (fma y, z, (fmul u, v)))
10993     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
10994     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
10995         isContractableFMUL(N1.getOperand(2))) {
10996       SDValue N20 = N1.getOperand(2).getOperand(0);
10997       SDValue N21 = N1.getOperand(2).getOperand(1);
10998       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10999                          DAG.getNode(ISD::FNEG, SL, VT,
11000                                      N1.getOperand(0)),
11001                          N1.getOperand(1),
11002                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11003                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11004                                      N21, N0, Flags), Flags);
11005     }
11006 
11007 
11008     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11009     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11010     if (N0.getOpcode() == PreferredFusedOpcode) {
11011       SDValue N02 = N0.getOperand(2);
11012       if (N02.getOpcode() == ISD::FP_EXTEND) {
11013         SDValue N020 = N02.getOperand(0);
11014         if (isContractableFMUL(N020) &&
11015             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11016           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11017                              N0.getOperand(0), N0.getOperand(1),
11018                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11019                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11020                                                      N020.getOperand(0)),
11021                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11022                                                      N020.getOperand(1)),
11023                                          DAG.getNode(ISD::FNEG, SL, VT,
11024                                                      N1), Flags), Flags);
11025         }
11026       }
11027     }
11028 
11029     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11030     //   -> (fma (fpext x), (fpext y),
11031     //           (fma (fpext u), (fpext v), (fneg z)))
11032     // FIXME: This turns two single-precision and one double-precision
11033     // operation into two double-precision operations, which might not be
11034     // interesting for all targets, especially GPUs.
11035     if (N0.getOpcode() == ISD::FP_EXTEND) {
11036       SDValue N00 = N0.getOperand(0);
11037       if (N00.getOpcode() == PreferredFusedOpcode) {
11038         SDValue N002 = N00.getOperand(2);
11039         if (isContractableFMUL(N002) &&
11040             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11041           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11042                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11043                                          N00.getOperand(0)),
11044                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11045                                          N00.getOperand(1)),
11046                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11047                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11048                                                      N002.getOperand(0)),
11049                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11050                                                      N002.getOperand(1)),
11051                                          DAG.getNode(ISD::FNEG, SL, VT,
11052                                                      N1), Flags), Flags);
11053         }
11054       }
11055     }
11056 
11057     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11058     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11059     if (N1.getOpcode() == PreferredFusedOpcode &&
11060         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11061       SDValue N120 = N1.getOperand(2).getOperand(0);
11062       if (isContractableFMUL(N120) &&
11063           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11064         SDValue N1200 = N120.getOperand(0);
11065         SDValue N1201 = N120.getOperand(1);
11066         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11067                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11068                            N1.getOperand(1),
11069                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11070                                        DAG.getNode(ISD::FNEG, SL, VT,
11071                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11072                                                                VT, N1200)),
11073                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11074                                                    N1201),
11075                                        N0, Flags), Flags);
11076       }
11077     }
11078 
11079     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11080     //   -> (fma (fneg (fpext y)), (fpext z),
11081     //           (fma (fneg (fpext u)), (fpext v), x))
11082     // FIXME: This turns two single-precision and one double-precision
11083     // operation into two double-precision operations, which might not be
11084     // interesting for all targets, especially GPUs.
11085     if (N1.getOpcode() == ISD::FP_EXTEND &&
11086         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11087       SDValue CvtSrc = N1.getOperand(0);
11088       SDValue N100 = CvtSrc.getOperand(0);
11089       SDValue N101 = CvtSrc.getOperand(1);
11090       SDValue N102 = CvtSrc.getOperand(2);
11091       if (isContractableFMUL(N102) &&
11092           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11093         SDValue N1020 = N102.getOperand(0);
11094         SDValue N1021 = N102.getOperand(1);
11095         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11096                            DAG.getNode(ISD::FNEG, SL, VT,
11097                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11098                                                    N100)),
11099                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11100                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11101                                        DAG.getNode(ISD::FNEG, SL, VT,
11102                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11103                                                                VT, N1020)),
11104                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11105                                                    N1021),
11106                                        N0, Flags), Flags);
11107       }
11108     }
11109   }
11110 
11111   return SDValue();
11112 }
11113 
11114 /// Try to perform FMA combining on a given FMUL node based on the distributive
11115 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11116 /// subtraction instead of addition).
11117 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11118   SDValue N0 = N->getOperand(0);
11119   SDValue N1 = N->getOperand(1);
11120   EVT VT = N->getValueType(0);
11121   SDLoc SL(N);
11122   const SDNodeFlags Flags = N->getFlags();
11123 
11124   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11125 
11126   const TargetOptions &Options = DAG.getTarget().Options;
11127 
11128   // The transforms below are incorrect when x == 0 and y == inf, because the
11129   // intermediate multiplication produces a nan.
11130   if (!Options.NoInfsFPMath)
11131     return SDValue();
11132 
11133   // Floating-point multiply-add without intermediate rounding.
11134   bool HasFMA =
11135       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11136       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11137       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11138 
11139   // Floating-point multiply-add with intermediate rounding. This can result
11140   // in a less precise result due to the changed rounding order.
11141   bool HasFMAD = Options.UnsafeFPMath &&
11142                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11143 
11144   // No valid opcode, do not combine.
11145   if (!HasFMAD && !HasFMA)
11146     return SDValue();
11147 
11148   // Always prefer FMAD to FMA for precision.
11149   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11150   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11151 
11152   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11153   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11154   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11155     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11156       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11157         if (C->isExactlyValue(+1.0))
11158           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11159                              Y, Flags);
11160         if (C->isExactlyValue(-1.0))
11161           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11162                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11163       }
11164     }
11165     return SDValue();
11166   };
11167 
11168   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11169     return FMA;
11170   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11171     return FMA;
11172 
11173   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11174   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11175   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11176   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11177   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11178     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11179       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11180         if (C0->isExactlyValue(+1.0))
11181           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11182                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11183                              Y, Flags);
11184         if (C0->isExactlyValue(-1.0))
11185           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11186                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11187                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11188       }
11189       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11190         if (C1->isExactlyValue(+1.0))
11191           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11192                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11193         if (C1->isExactlyValue(-1.0))
11194           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11195                              Y, Flags);
11196       }
11197     }
11198     return SDValue();
11199   };
11200 
11201   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11202     return FMA;
11203   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11204     return FMA;
11205 
11206   return SDValue();
11207 }
11208 
11209 SDValue DAGCombiner::visitFADD(SDNode *N) {
11210   SDValue N0 = N->getOperand(0);
11211   SDValue N1 = N->getOperand(1);
11212   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11213   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11214   EVT VT = N->getValueType(0);
11215   SDLoc DL(N);
11216   const TargetOptions &Options = DAG.getTarget().Options;
11217   const SDNodeFlags Flags = N->getFlags();
11218 
11219   // fold vector ops
11220   if (VT.isVector())
11221     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11222       return FoldedVOp;
11223 
11224   // fold (fadd c1, c2) -> c1 + c2
11225   if (N0CFP && N1CFP)
11226     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11227 
11228   // canonicalize constant to RHS
11229   if (N0CFP && !N1CFP)
11230     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11231 
11232   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11233   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11234   if (N1C && N1C->isZero())
11235     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11236       return N0;
11237 
11238   if (SDValue NewSel = foldBinOpIntoSelect(N))
11239     return NewSel;
11240 
11241   // fold (fadd A, (fneg B)) -> (fsub A, B)
11242   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11243       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
11244     return DAG.getNode(ISD::FSUB, DL, VT, N0,
11245                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11246 
11247   // fold (fadd (fneg A), B) -> (fsub B, A)
11248   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11249       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
11250     return DAG.getNode(ISD::FSUB, DL, VT, N1,
11251                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
11252 
11253   auto isFMulNegTwo = [](SDValue FMul) {
11254     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11255       return false;
11256     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11257     return C && C->isExactlyValue(-2.0);
11258   };
11259 
11260   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11261   if (isFMulNegTwo(N0)) {
11262     SDValue B = N0.getOperand(0);
11263     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11264     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11265   }
11266   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11267   if (isFMulNegTwo(N1)) {
11268     SDValue B = N1.getOperand(0);
11269     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11270     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11271   }
11272 
11273   // No FP constant should be created after legalization as Instruction
11274   // Selection pass has a hard time dealing with FP constants.
11275   bool AllowNewConst = (Level < AfterLegalizeDAG);
11276 
11277   // If 'unsafe math' or nnan is enabled, fold lots of things.
11278   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11279     // If allowed, fold (fadd (fneg x), x) -> 0.0
11280     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11281       return DAG.getConstantFP(0.0, DL, VT);
11282 
11283     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11284     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11285       return DAG.getConstantFP(0.0, DL, VT);
11286   }
11287 
11288   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11289   // TODO: break out portions of the transformations below for which Unsafe is
11290   //       considered and which do not require both nsz and reassoc
11291   if ((Options.UnsafeFPMath ||
11292        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11293       AllowNewConst) {
11294     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11295     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11296         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11297       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11298       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11299     }
11300 
11301     // We can fold chains of FADD's of the same value into multiplications.
11302     // This transform is not safe in general because we are reducing the number
11303     // of rounding steps.
11304     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11305       if (N0.getOpcode() == ISD::FMUL) {
11306         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11307         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11308 
11309         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11310         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11311           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11312                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11313           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11314         }
11315 
11316         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11317         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11318             N1.getOperand(0) == N1.getOperand(1) &&
11319             N0.getOperand(0) == N1.getOperand(0)) {
11320           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11321                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11322           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11323         }
11324       }
11325 
11326       if (N1.getOpcode() == ISD::FMUL) {
11327         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11328         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11329 
11330         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11331         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11332           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11333                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11334           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11335         }
11336 
11337         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11338         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11339             N0.getOperand(0) == N0.getOperand(1) &&
11340             N1.getOperand(0) == N0.getOperand(0)) {
11341           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11342                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11343           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11344         }
11345       }
11346 
11347       if (N0.getOpcode() == ISD::FADD) {
11348         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11349         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11350         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11351             (N0.getOperand(0) == N1)) {
11352           return DAG.getNode(ISD::FMUL, DL, VT,
11353                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11354         }
11355       }
11356 
11357       if (N1.getOpcode() == ISD::FADD) {
11358         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11359         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11360         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11361             N1.getOperand(0) == N0) {
11362           return DAG.getNode(ISD::FMUL, DL, VT,
11363                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11364         }
11365       }
11366 
11367       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11368       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11369           N0.getOperand(0) == N0.getOperand(1) &&
11370           N1.getOperand(0) == N1.getOperand(1) &&
11371           N0.getOperand(0) == N1.getOperand(0)) {
11372         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11373                            DAG.getConstantFP(4.0, DL, VT), Flags);
11374       }
11375     }
11376   } // enable-unsafe-fp-math
11377 
11378   // FADD -> FMA combines:
11379   if (SDValue Fused = visitFADDForFMACombine(N)) {
11380     AddToWorklist(Fused.getNode());
11381     return Fused;
11382   }
11383   return SDValue();
11384 }
11385 
11386 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11387   SDValue N0 = N->getOperand(0);
11388   SDValue N1 = N->getOperand(1);
11389   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11390   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11391   EVT VT = N->getValueType(0);
11392   SDLoc DL(N);
11393   const TargetOptions &Options = DAG.getTarget().Options;
11394   const SDNodeFlags Flags = N->getFlags();
11395 
11396   // fold vector ops
11397   if (VT.isVector())
11398     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11399       return FoldedVOp;
11400 
11401   // fold (fsub c1, c2) -> c1-c2
11402   if (N0CFP && N1CFP)
11403     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11404 
11405   if (SDValue NewSel = foldBinOpIntoSelect(N))
11406     return NewSel;
11407 
11408   // (fsub A, 0) -> A
11409   if (N1CFP && N1CFP->isZero()) {
11410     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11411         Flags.hasNoSignedZeros()) {
11412       return N0;
11413     }
11414   }
11415 
11416   if (N0 == N1) {
11417     // (fsub x, x) -> 0.0
11418     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11419       return DAG.getConstantFP(0.0f, DL, VT);
11420   }
11421 
11422   // (fsub -0.0, N1) -> -N1
11423   if (N0CFP && N0CFP->isZero()) {
11424     if (N0CFP->isNegative() ||
11425         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11426       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11427         return GetNegatedExpression(N1, DAG, LegalOperations);
11428       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11429         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11430     }
11431   }
11432 
11433   if ((Options.UnsafeFPMath ||
11434       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11435       && N1.getOpcode() == ISD::FADD) {
11436     // X - (X + Y) -> -Y
11437     if (N0 == N1->getOperand(0))
11438       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11439     // X - (Y + X) -> -Y
11440     if (N0 == N1->getOperand(1))
11441       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11442   }
11443 
11444   // fold (fsub A, (fneg B)) -> (fadd A, B)
11445   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
11446     return DAG.getNode(ISD::FADD, DL, VT, N0,
11447                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
11448 
11449   // FSUB -> FMA combines:
11450   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11451     AddToWorklist(Fused.getNode());
11452     return Fused;
11453   }
11454 
11455   return SDValue();
11456 }
11457 
11458 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11459   SDValue N0 = N->getOperand(0);
11460   SDValue N1 = N->getOperand(1);
11461   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11462   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11463   EVT VT = N->getValueType(0);
11464   SDLoc DL(N);
11465   const TargetOptions &Options = DAG.getTarget().Options;
11466   const SDNodeFlags Flags = N->getFlags();
11467 
11468   // fold vector ops
11469   if (VT.isVector()) {
11470     // This just handles C1 * C2 for vectors. Other vector folds are below.
11471     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11472       return FoldedVOp;
11473   }
11474 
11475   // fold (fmul c1, c2) -> c1*c2
11476   if (N0CFP && N1CFP)
11477     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11478 
11479   // canonicalize constant to RHS
11480   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11481      !isConstantFPBuildVectorOrConstantFP(N1))
11482     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11483 
11484   // fold (fmul A, 1.0) -> A
11485   if (N1CFP && N1CFP->isExactlyValue(1.0))
11486     return N0;
11487 
11488   if (SDValue NewSel = foldBinOpIntoSelect(N))
11489     return NewSel;
11490 
11491   if (Options.UnsafeFPMath ||
11492       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11493     // fold (fmul A, 0) -> 0
11494     if (N1CFP && N1CFP->isZero())
11495       return N1;
11496   }
11497 
11498   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11499     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11500     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11501         N0.getOpcode() == ISD::FMUL) {
11502       SDValue N00 = N0.getOperand(0);
11503       SDValue N01 = N0.getOperand(1);
11504       // Avoid an infinite loop by making sure that N00 is not a constant
11505       // (the inner multiply has not been constant folded yet).
11506       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11507           !isConstantFPBuildVectorOrConstantFP(N00)) {
11508         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11509         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11510       }
11511     }
11512 
11513     // Match a special-case: we convert X * 2.0 into fadd.
11514     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11515     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11516         N0.getOperand(0) == N0.getOperand(1)) {
11517       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11518       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11519       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11520     }
11521   }
11522 
11523   // fold (fmul X, 2.0) -> (fadd X, X)
11524   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11525     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11526 
11527   // fold (fmul X, -1.0) -> (fneg X)
11528   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11529     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11530       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11531 
11532   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11533   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11534     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11535       // Both can be negated for free, check to see if at least one is cheaper
11536       // negated.
11537       if (LHSNeg == 2 || RHSNeg == 2)
11538         return DAG.getNode(ISD::FMUL, DL, VT,
11539                            GetNegatedExpression(N0, DAG, LegalOperations),
11540                            GetNegatedExpression(N1, DAG, LegalOperations),
11541                            Flags);
11542     }
11543   }
11544 
11545   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11546   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11547   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11548       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11549       TLI.isOperationLegal(ISD::FABS, VT)) {
11550     SDValue Select = N0, X = N1;
11551     if (Select.getOpcode() != ISD::SELECT)
11552       std::swap(Select, X);
11553 
11554     SDValue Cond = Select.getOperand(0);
11555     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11556     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11557 
11558     if (TrueOpnd && FalseOpnd &&
11559         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11560         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11561         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11562       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11563       switch (CC) {
11564       default: break;
11565       case ISD::SETOLT:
11566       case ISD::SETULT:
11567       case ISD::SETOLE:
11568       case ISD::SETULE:
11569       case ISD::SETLT:
11570       case ISD::SETLE:
11571         std::swap(TrueOpnd, FalseOpnd);
11572         LLVM_FALLTHROUGH;
11573       case ISD::SETOGT:
11574       case ISD::SETUGT:
11575       case ISD::SETOGE:
11576       case ISD::SETUGE:
11577       case ISD::SETGT:
11578       case ISD::SETGE:
11579         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11580             TLI.isOperationLegal(ISD::FNEG, VT))
11581           return DAG.getNode(ISD::FNEG, DL, VT,
11582                    DAG.getNode(ISD::FABS, DL, VT, X));
11583         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11584           return DAG.getNode(ISD::FABS, DL, VT, X);
11585 
11586         break;
11587       }
11588     }
11589   }
11590 
11591   // FMUL -> FMA combines:
11592   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11593     AddToWorklist(Fused.getNode());
11594     return Fused;
11595   }
11596 
11597   return SDValue();
11598 }
11599 
11600 SDValue DAGCombiner::visitFMA(SDNode *N) {
11601   SDValue N0 = N->getOperand(0);
11602   SDValue N1 = N->getOperand(1);
11603   SDValue N2 = N->getOperand(2);
11604   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11605   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11606   EVT VT = N->getValueType(0);
11607   SDLoc DL(N);
11608   const TargetOptions &Options = DAG.getTarget().Options;
11609 
11610   // FMA nodes have flags that propagate to the created nodes.
11611   const SDNodeFlags Flags = N->getFlags();
11612   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11613 
11614   // Constant fold FMA.
11615   if (isa<ConstantFPSDNode>(N0) &&
11616       isa<ConstantFPSDNode>(N1) &&
11617       isa<ConstantFPSDNode>(N2)) {
11618     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11619   }
11620 
11621   if (UnsafeFPMath) {
11622     if (N0CFP && N0CFP->isZero())
11623       return N2;
11624     if (N1CFP && N1CFP->isZero())
11625       return N2;
11626   }
11627   // TODO: The FMA node should have flags that propagate to these nodes.
11628   if (N0CFP && N0CFP->isExactlyValue(1.0))
11629     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11630   if (N1CFP && N1CFP->isExactlyValue(1.0))
11631     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11632 
11633   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11634   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11635      !isConstantFPBuildVectorOrConstantFP(N1))
11636     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11637 
11638   if (UnsafeFPMath) {
11639     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11640     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11641         isConstantFPBuildVectorOrConstantFP(N1) &&
11642         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11643       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11644                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11645                                      Flags), Flags);
11646     }
11647 
11648     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11649     if (N0.getOpcode() == ISD::FMUL &&
11650         isConstantFPBuildVectorOrConstantFP(N1) &&
11651         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11652       return DAG.getNode(ISD::FMA, DL, VT,
11653                          N0.getOperand(0),
11654                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11655                                      Flags),
11656                          N2);
11657     }
11658   }
11659 
11660   // (fma x, 1, y) -> (fadd x, y)
11661   // (fma x, -1, y) -> (fadd (fneg x), y)
11662   if (N1CFP) {
11663     if (N1CFP->isExactlyValue(1.0))
11664       // TODO: The FMA node should have flags that propagate to this node.
11665       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11666 
11667     if (N1CFP->isExactlyValue(-1.0) &&
11668         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11669       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11670       AddToWorklist(RHSNeg.getNode());
11671       // TODO: The FMA node should have flags that propagate to this node.
11672       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11673     }
11674 
11675     // fma (fneg x), K, y -> fma x -K, y
11676     if (N0.getOpcode() == ISD::FNEG &&
11677         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11678          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
11679       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11680                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11681     }
11682   }
11683 
11684   if (UnsafeFPMath) {
11685     // (fma x, c, x) -> (fmul x, (c+1))
11686     if (N1CFP && N0 == N2) {
11687       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11688                          DAG.getNode(ISD::FADD, DL, VT, N1,
11689                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11690                          Flags);
11691     }
11692 
11693     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11694     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11695       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11696                          DAG.getNode(ISD::FADD, DL, VT, N1,
11697                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11698                          Flags);
11699     }
11700   }
11701 
11702   return SDValue();
11703 }
11704 
11705 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11706 // reciprocal.
11707 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11708 // Notice that this is not always beneficial. One reason is different targets
11709 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11710 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11711 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11712 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11713   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11714   const SDNodeFlags Flags = N->getFlags();
11715   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11716     return SDValue();
11717 
11718   // Skip if current node is a reciprocal.
11719   SDValue N0 = N->getOperand(0);
11720   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11721   if (N0CFP && N0CFP->isExactlyValue(1.0))
11722     return SDValue();
11723 
11724   // Exit early if the target does not want this transform or if there can't
11725   // possibly be enough uses of the divisor to make the transform worthwhile.
11726   SDValue N1 = N->getOperand(1);
11727   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11728   if (!MinUses || N1->use_size() < MinUses)
11729     return SDValue();
11730 
11731   // Find all FDIV users of the same divisor.
11732   // Use a set because duplicates may be present in the user list.
11733   SetVector<SDNode *> Users;
11734   for (auto *U : N1->uses()) {
11735     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11736       // This division is eligible for optimization only if global unsafe math
11737       // is enabled or if this division allows reciprocal formation.
11738       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11739         Users.insert(U);
11740     }
11741   }
11742 
11743   // Now that we have the actual number of divisor uses, make sure it meets
11744   // the minimum threshold specified by the target.
11745   if (Users.size() < MinUses)
11746     return SDValue();
11747 
11748   EVT VT = N->getValueType(0);
11749   SDLoc DL(N);
11750   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11751   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11752 
11753   // Dividend / Divisor -> Dividend * Reciprocal
11754   for (auto *U : Users) {
11755     SDValue Dividend = U->getOperand(0);
11756     if (Dividend != FPOne) {
11757       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11758                                     Reciprocal, Flags);
11759       CombineTo(U, NewNode);
11760     } else if (U != Reciprocal.getNode()) {
11761       // In the absence of fast-math-flags, this user node is always the
11762       // same node as Reciprocal, but with FMF they may be different nodes.
11763       CombineTo(U, Reciprocal);
11764     }
11765   }
11766   return SDValue(N, 0);  // N was replaced.
11767 }
11768 
11769 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11770   SDValue N0 = N->getOperand(0);
11771   SDValue N1 = N->getOperand(1);
11772   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11773   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11774   EVT VT = N->getValueType(0);
11775   SDLoc DL(N);
11776   const TargetOptions &Options = DAG.getTarget().Options;
11777   SDNodeFlags Flags = N->getFlags();
11778 
11779   // fold vector ops
11780   if (VT.isVector())
11781     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11782       return FoldedVOp;
11783 
11784   // fold (fdiv c1, c2) -> c1/c2
11785   if (N0CFP && N1CFP)
11786     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
11787 
11788   if (SDValue NewSel = foldBinOpIntoSelect(N))
11789     return NewSel;
11790 
11791   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
11792     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
11793     if (N1CFP) {
11794       // Compute the reciprocal 1.0 / c2.
11795       const APFloat &N1APF = N1CFP->getValueAPF();
11796       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
11797       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
11798       // Only do the transform if the reciprocal is a legal fp immediate that
11799       // isn't too nasty (eg NaN, denormal, ...).
11800       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
11801           (!LegalOperations ||
11802            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
11803            // backend)... we should handle this gracefully after Legalize.
11804            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
11805            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11806            TLI.isFPImmLegal(Recip, VT)))
11807         return DAG.getNode(ISD::FMUL, DL, VT, N0,
11808                            DAG.getConstantFP(Recip, DL, VT), Flags);
11809     }
11810 
11811     // If this FDIV is part of a reciprocal square root, it may be folded
11812     // into a target-specific square root estimate instruction.
11813     if (N1.getOpcode() == ISD::FSQRT) {
11814       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
11815         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11816       }
11817     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
11818                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11819       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11820                                           Flags)) {
11821         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
11822         AddToWorklist(RV.getNode());
11823         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11824       }
11825     } else if (N1.getOpcode() == ISD::FP_ROUND &&
11826                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11827       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
11828                                           Flags)) {
11829         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
11830         AddToWorklist(RV.getNode());
11831         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11832       }
11833     } else if (N1.getOpcode() == ISD::FMUL) {
11834       // Look through an FMUL. Even though this won't remove the FDIV directly,
11835       // it's still worthwhile to get rid of the FSQRT if possible.
11836       SDValue SqrtOp;
11837       SDValue OtherOp;
11838       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
11839         SqrtOp = N1.getOperand(0);
11840         OtherOp = N1.getOperand(1);
11841       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
11842         SqrtOp = N1.getOperand(1);
11843         OtherOp = N1.getOperand(0);
11844       }
11845       if (SqrtOp.getNode()) {
11846         // We found a FSQRT, so try to make this fold:
11847         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
11848         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
11849           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
11850           AddToWorklist(RV.getNode());
11851           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11852         }
11853       }
11854     }
11855 
11856     // Fold into a reciprocal estimate and multiply instead of a real divide.
11857     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
11858       AddToWorklist(RV.getNode());
11859       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
11860     }
11861   }
11862 
11863   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
11864   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
11865     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
11866       // Both can be negated for free, check to see if at least one is cheaper
11867       // negated.
11868       if (LHSNeg == 2 || RHSNeg == 2)
11869         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
11870                            GetNegatedExpression(N0, DAG, LegalOperations),
11871                            GetNegatedExpression(N1, DAG, LegalOperations),
11872                            Flags);
11873     }
11874   }
11875 
11876   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
11877     return CombineRepeatedDivisors;
11878 
11879   return SDValue();
11880 }
11881 
11882 SDValue DAGCombiner::visitFREM(SDNode *N) {
11883   SDValue N0 = N->getOperand(0);
11884   SDValue N1 = N->getOperand(1);
11885   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11886   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11887   EVT VT = N->getValueType(0);
11888 
11889   // fold (frem c1, c2) -> fmod(c1,c2)
11890   if (N0CFP && N1CFP)
11891     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
11892 
11893   if (SDValue NewSel = foldBinOpIntoSelect(N))
11894     return NewSel;
11895 
11896   return SDValue();
11897 }
11898 
11899 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
11900   SDNodeFlags Flags = N->getFlags();
11901   if (!DAG.getTarget().Options.UnsafeFPMath &&
11902       !Flags.hasApproximateFuncs())
11903     return SDValue();
11904 
11905   SDValue N0 = N->getOperand(0);
11906   if (TLI.isFsqrtCheap(N0, DAG))
11907     return SDValue();
11908 
11909   // FSQRT nodes have flags that propagate to the created nodes.
11910   return buildSqrtEstimate(N0, Flags);
11911 }
11912 
11913 /// copysign(x, fp_extend(y)) -> copysign(x, y)
11914 /// copysign(x, fp_round(y)) -> copysign(x, y)
11915 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
11916   SDValue N1 = N->getOperand(1);
11917   if ((N1.getOpcode() == ISD::FP_EXTEND ||
11918        N1.getOpcode() == ISD::FP_ROUND)) {
11919     // Do not optimize out type conversion of f128 type yet.
11920     // For some targets like x86_64, configuration is changed to keep one f128
11921     // value in one SSE register, but instruction selection cannot handle
11922     // FCOPYSIGN on SSE registers yet.
11923     EVT N1VT = N1->getValueType(0);
11924     EVT N1Op0VT = N1->getOperand(0).getValueType();
11925     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
11926   }
11927   return false;
11928 }
11929 
11930 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
11931   SDValue N0 = N->getOperand(0);
11932   SDValue N1 = N->getOperand(1);
11933   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11934   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11935   EVT VT = N->getValueType(0);
11936 
11937   if (N0CFP && N1CFP) // Constant fold
11938     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
11939 
11940   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
11941     const APFloat &V = N1C->getValueAPF();
11942     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
11943     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
11944     if (!V.isNegative()) {
11945       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
11946         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11947     } else {
11948       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11949         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
11950                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
11951     }
11952   }
11953 
11954   // copysign(fabs(x), y) -> copysign(x, y)
11955   // copysign(fneg(x), y) -> copysign(x, y)
11956   // copysign(copysign(x,z), y) -> copysign(x, y)
11957   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
11958       N0.getOpcode() == ISD::FCOPYSIGN)
11959     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
11960 
11961   // copysign(x, abs(y)) -> abs(x)
11962   if (N1.getOpcode() == ISD::FABS)
11963     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
11964 
11965   // copysign(x, copysign(y,z)) -> copysign(x, z)
11966   if (N1.getOpcode() == ISD::FCOPYSIGN)
11967     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
11968 
11969   // copysign(x, fp_extend(y)) -> copysign(x, y)
11970   // copysign(x, fp_round(y)) -> copysign(x, y)
11971   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
11972     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
11973 
11974   return SDValue();
11975 }
11976 
11977 SDValue DAGCombiner::visitFPOW(SDNode *N) {
11978   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
11979   if (!ExponentC)
11980     return SDValue();
11981 
11982   // Try to convert x ** (1/3) into cube root.
11983   // TODO: Handle the various flavors of long double.
11984   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
11985   //       Some range near 1/3 should be fine.
11986   EVT VT = N->getValueType(0);
11987   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
11988       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
11989     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
11990     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
11991     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
11992     // For regular numbers, rounding may cause the results to differ.
11993     // Therefore, we require { nsz ninf nnan afn } for this transform.
11994     // TODO: We could select out the special cases if we don't have nsz/ninf.
11995     SDNodeFlags Flags = N->getFlags();
11996     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
11997         !Flags.hasApproximateFuncs())
11998       return SDValue();
11999 
12000     // Do not create a cbrt() libcall if the target does not have it, and do not
12001     // turn a pow that has lowering support into a cbrt() libcall.
12002     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12003         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12004          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12005       return SDValue();
12006 
12007     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12008   }
12009 
12010   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12011   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12012   // TODO: This could be extended (using a target hook) to handle smaller
12013   // power-of-2 fractional exponents.
12014   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12015   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12016   if (ExponentIs025 || ExponentIs075) {
12017     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12018     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12019     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12020     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12021     // For regular numbers, rounding may cause the results to differ.
12022     // Therefore, we require { nsz ninf afn } for this transform.
12023     // TODO: We could select out the special cases if we don't have nsz/ninf.
12024     SDNodeFlags Flags = N->getFlags();
12025 
12026     // We only need no signed zeros for the 0.25 case.
12027     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12028         !Flags.hasApproximateFuncs())
12029       return SDValue();
12030 
12031     // Don't double the number of libcalls. We are trying to inline fast code.
12032     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12033       return SDValue();
12034 
12035     // Assume that libcalls are the smallest code.
12036     // TODO: This restriction should probably be lifted for vectors.
12037     if (DAG.getMachineFunction().getFunction().optForSize())
12038       return SDValue();
12039 
12040     // pow(X, 0.25) --> sqrt(sqrt(X))
12041     SDLoc DL(N);
12042     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12043     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12044     if (ExponentIs025)
12045       return SqrtSqrt;
12046     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12047     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12048   }
12049 
12050   return SDValue();
12051 }
12052 
12053 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12054                                const TargetLowering &TLI) {
12055   // This optimization is guarded by a function attribute because it may produce
12056   // unexpected results. Ie, programs may be relying on the platform-specific
12057   // undefined behavior when the float-to-int conversion overflows.
12058   const Function &F = DAG.getMachineFunction().getFunction();
12059   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12060   if (StrictOverflow.getValueAsString().equals("false"))
12061     return SDValue();
12062 
12063   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12064   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12065   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12066   // conversions would return +0.0.
12067   // FIXME: We should be able to use node-level FMF here.
12068   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12069   EVT VT = N->getValueType(0);
12070   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12071       !DAG.getTarget().Options.NoSignedZerosFPMath)
12072     return SDValue();
12073 
12074   // fptosi/fptoui round towards zero, so converting from FP to integer and
12075   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12076   SDValue N0 = N->getOperand(0);
12077   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12078       N0.getOperand(0).getValueType() == VT)
12079     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12080 
12081   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12082       N0.getOperand(0).getValueType() == VT)
12083     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12084 
12085   return SDValue();
12086 }
12087 
12088 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12089   SDValue N0 = N->getOperand(0);
12090   EVT VT = N->getValueType(0);
12091   EVT OpVT = N0.getValueType();
12092 
12093   // fold (sint_to_fp c1) -> c1fp
12094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12095       // ...but only if the target supports immediate floating-point values
12096       (!LegalOperations ||
12097        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12098     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12099 
12100   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12101   // but UINT_TO_FP is legal on this target, try to convert.
12102   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12103       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12104     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12105     if (DAG.SignBitIsZero(N0))
12106       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12107   }
12108 
12109   // The next optimizations are desirable only if SELECT_CC can be lowered.
12110   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12111     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12112     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12113         !VT.isVector() &&
12114         (!LegalOperations ||
12115          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12116       SDLoc DL(N);
12117       SDValue Ops[] =
12118         { N0.getOperand(0), N0.getOperand(1),
12119           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12120           N0.getOperand(2) };
12121       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12122     }
12123 
12124     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12125     //      (select_cc x, y, 1.0, 0.0,, cc)
12126     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12127         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12128         (!LegalOperations ||
12129          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12130       SDLoc DL(N);
12131       SDValue Ops[] =
12132         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12133           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12134           N0.getOperand(0).getOperand(2) };
12135       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12136     }
12137   }
12138 
12139   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12140     return FTrunc;
12141 
12142   return SDValue();
12143 }
12144 
12145 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12146   SDValue N0 = N->getOperand(0);
12147   EVT VT = N->getValueType(0);
12148   EVT OpVT = N0.getValueType();
12149 
12150   // fold (uint_to_fp c1) -> c1fp
12151   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12152       // ...but only if the target supports immediate floating-point values
12153       (!LegalOperations ||
12154        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12155     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12156 
12157   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12158   // but SINT_TO_FP is legal on this target, try to convert.
12159   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12160       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12161     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12162     if (DAG.SignBitIsZero(N0))
12163       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12164   }
12165 
12166   // The next optimizations are desirable only if SELECT_CC can be lowered.
12167   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12168     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12169     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12170         (!LegalOperations ||
12171          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12172       SDLoc DL(N);
12173       SDValue Ops[] =
12174         { N0.getOperand(0), N0.getOperand(1),
12175           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12176           N0.getOperand(2) };
12177       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12178     }
12179   }
12180 
12181   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12182     return FTrunc;
12183 
12184   return SDValue();
12185 }
12186 
12187 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12188 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12189   SDValue N0 = N->getOperand(0);
12190   EVT VT = N->getValueType(0);
12191 
12192   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12193     return SDValue();
12194 
12195   SDValue Src = N0.getOperand(0);
12196   EVT SrcVT = Src.getValueType();
12197   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12198   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12199 
12200   // We can safely assume the conversion won't overflow the output range,
12201   // because (for example) (uint8_t)18293.f is undefined behavior.
12202 
12203   // Since we can assume the conversion won't overflow, our decision as to
12204   // whether the input will fit in the float should depend on the minimum
12205   // of the input range and output range.
12206 
12207   // This means this is also safe for a signed input and unsigned output, since
12208   // a negative input would lead to undefined behavior.
12209   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12210   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12211   unsigned ActualSize = std::min(InputSize, OutputSize);
12212   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12213 
12214   // We can only fold away the float conversion if the input range can be
12215   // represented exactly in the float range.
12216   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12217     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12218       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12219                                                        : ISD::ZERO_EXTEND;
12220       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12221     }
12222     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12223       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12224     return DAG.getBitcast(VT, Src);
12225   }
12226   return SDValue();
12227 }
12228 
12229 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12230   SDValue N0 = N->getOperand(0);
12231   EVT VT = N->getValueType(0);
12232 
12233   // fold (fp_to_sint c1fp) -> c1
12234   if (isConstantFPBuildVectorOrConstantFP(N0))
12235     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12236 
12237   return FoldIntToFPToInt(N, DAG);
12238 }
12239 
12240 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12241   SDValue N0 = N->getOperand(0);
12242   EVT VT = N->getValueType(0);
12243 
12244   // fold (fp_to_uint c1fp) -> c1
12245   if (isConstantFPBuildVectorOrConstantFP(N0))
12246     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12247 
12248   return FoldIntToFPToInt(N, DAG);
12249 }
12250 
12251 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12252   SDValue N0 = N->getOperand(0);
12253   SDValue N1 = N->getOperand(1);
12254   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12255   EVT VT = N->getValueType(0);
12256 
12257   // fold (fp_round c1fp) -> c1fp
12258   if (N0CFP)
12259     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12260 
12261   // fold (fp_round (fp_extend x)) -> x
12262   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12263     return N0.getOperand(0);
12264 
12265   // fold (fp_round (fp_round x)) -> (fp_round x)
12266   if (N0.getOpcode() == ISD::FP_ROUND) {
12267     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12268     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12269 
12270     // Skip this folding if it results in an fp_round from f80 to f16.
12271     //
12272     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12273     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12274     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12275     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12276     // x86.
12277     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12278       return SDValue();
12279 
12280     // If the first fp_round isn't a value preserving truncation, it might
12281     // introduce a tie in the second fp_round, that wouldn't occur in the
12282     // single-step fp_round we want to fold to.
12283     // In other words, double rounding isn't the same as rounding.
12284     // Also, this is a value preserving truncation iff both fp_round's are.
12285     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12286       SDLoc DL(N);
12287       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12288                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12289     }
12290   }
12291 
12292   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12293   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12294     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12295                               N0.getOperand(0), N1);
12296     AddToWorklist(Tmp.getNode());
12297     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12298                        Tmp, N0.getOperand(1));
12299   }
12300 
12301   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12302     return NewVSel;
12303 
12304   return SDValue();
12305 }
12306 
12307 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12308   SDValue N0 = N->getOperand(0);
12309   EVT VT = N->getValueType(0);
12310   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12311   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12312 
12313   // fold (fp_round_inreg c1fp) -> c1fp
12314   if (N0CFP && isTypeLegal(EVT)) {
12315     SDLoc DL(N);
12316     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12317     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12318   }
12319 
12320   return SDValue();
12321 }
12322 
12323 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12324   SDValue N0 = N->getOperand(0);
12325   EVT VT = N->getValueType(0);
12326 
12327   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12328   if (N->hasOneUse() &&
12329       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12330     return SDValue();
12331 
12332   // fold (fp_extend c1fp) -> c1fp
12333   if (isConstantFPBuildVectorOrConstantFP(N0))
12334     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12335 
12336   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12337   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12338       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12339     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12340 
12341   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12342   // value of X.
12343   if (N0.getOpcode() == ISD::FP_ROUND
12344       && N0.getConstantOperandVal(1) == 1) {
12345     SDValue In = N0.getOperand(0);
12346     if (In.getValueType() == VT) return In;
12347     if (VT.bitsLT(In.getValueType()))
12348       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12349                          In, N0.getOperand(1));
12350     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12351   }
12352 
12353   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12354   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12355        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12356     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12357     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12358                                      LN0->getChain(),
12359                                      LN0->getBasePtr(), N0.getValueType(),
12360                                      LN0->getMemOperand());
12361     CombineTo(N, ExtLoad);
12362     CombineTo(N0.getNode(),
12363               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12364                           N0.getValueType(), ExtLoad,
12365                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12366               ExtLoad.getValue(1));
12367     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12368   }
12369 
12370   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12371     return NewVSel;
12372 
12373   return SDValue();
12374 }
12375 
12376 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12377   SDValue N0 = N->getOperand(0);
12378   EVT VT = N->getValueType(0);
12379 
12380   // fold (fceil c1) -> fceil(c1)
12381   if (isConstantFPBuildVectorOrConstantFP(N0))
12382     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12383 
12384   return SDValue();
12385 }
12386 
12387 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12388   SDValue N0 = N->getOperand(0);
12389   EVT VT = N->getValueType(0);
12390 
12391   // fold (ftrunc c1) -> ftrunc(c1)
12392   if (isConstantFPBuildVectorOrConstantFP(N0))
12393     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12394 
12395   // fold ftrunc (known rounded int x) -> x
12396   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12397   // likely to be generated to extract integer from a rounded floating value.
12398   switch (N0.getOpcode()) {
12399   default: break;
12400   case ISD::FRINT:
12401   case ISD::FTRUNC:
12402   case ISD::FNEARBYINT:
12403   case ISD::FFLOOR:
12404   case ISD::FCEIL:
12405     return N0;
12406   }
12407 
12408   return SDValue();
12409 }
12410 
12411 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12412   SDValue N0 = N->getOperand(0);
12413   EVT VT = N->getValueType(0);
12414 
12415   // fold (ffloor c1) -> ffloor(c1)
12416   if (isConstantFPBuildVectorOrConstantFP(N0))
12417     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12418 
12419   return SDValue();
12420 }
12421 
12422 // FIXME: FNEG and FABS have a lot in common; refactor.
12423 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12424   SDValue N0 = N->getOperand(0);
12425   EVT VT = N->getValueType(0);
12426 
12427   // Constant fold FNEG.
12428   if (isConstantFPBuildVectorOrConstantFP(N0))
12429     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12430 
12431   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12432                          &DAG.getTarget().Options))
12433     return GetNegatedExpression(N0, DAG, LegalOperations);
12434 
12435   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12436   // constant pool values.
12437   if (!TLI.isFNegFree(VT) &&
12438       N0.getOpcode() == ISD::BITCAST &&
12439       N0.getNode()->hasOneUse()) {
12440     SDValue Int = N0.getOperand(0);
12441     EVT IntVT = Int.getValueType();
12442     if (IntVT.isInteger() && !IntVT.isVector()) {
12443       APInt SignMask;
12444       if (N0.getValueType().isVector()) {
12445         // For a vector, get a mask such as 0x80... per scalar element
12446         // and splat it.
12447         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12448         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12449       } else {
12450         // For a scalar, just generate 0x80...
12451         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12452       }
12453       SDLoc DL0(N0);
12454       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12455                         DAG.getConstant(SignMask, DL0, IntVT));
12456       AddToWorklist(Int.getNode());
12457       return DAG.getBitcast(VT, Int);
12458     }
12459   }
12460 
12461   // (fneg (fmul c, x)) -> (fmul -c, x)
12462   if (N0.getOpcode() == ISD::FMUL &&
12463       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12464     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12465     if (CFP1) {
12466       APFloat CVal = CFP1->getValueAPF();
12467       CVal.changeSign();
12468       if (Level >= AfterLegalizeDAG &&
12469           (TLI.isFPImmLegal(CVal, VT) ||
12470            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12471         return DAG.getNode(
12472             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12473             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12474             N0->getFlags());
12475     }
12476   }
12477 
12478   return SDValue();
12479 }
12480 
12481 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12482                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12483   SDValue N0 = N->getOperand(0);
12484   SDValue N1 = N->getOperand(1);
12485   EVT VT = N->getValueType(0);
12486   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12487   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12488 
12489   if (N0CFP && N1CFP) {
12490     const APFloat &C0 = N0CFP->getValueAPF();
12491     const APFloat &C1 = N1CFP->getValueAPF();
12492     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12493   }
12494 
12495   // Canonicalize to constant on RHS.
12496   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12497       !isConstantFPBuildVectorOrConstantFP(N1))
12498     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12499 
12500   return SDValue();
12501 }
12502 
12503 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12504   return visitFMinMax(DAG, N, minnum);
12505 }
12506 
12507 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12508   return visitFMinMax(DAG, N, maxnum);
12509 }
12510 
12511 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12512   return visitFMinMax(DAG, N, minimum);
12513 }
12514 
12515 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12516   return visitFMinMax(DAG, N, maximum);
12517 }
12518 
12519 SDValue DAGCombiner::visitFABS(SDNode *N) {
12520   SDValue N0 = N->getOperand(0);
12521   EVT VT = N->getValueType(0);
12522 
12523   // fold (fabs c1) -> fabs(c1)
12524   if (isConstantFPBuildVectorOrConstantFP(N0))
12525     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12526 
12527   // fold (fabs (fabs x)) -> (fabs x)
12528   if (N0.getOpcode() == ISD::FABS)
12529     return N->getOperand(0);
12530 
12531   // fold (fabs (fneg x)) -> (fabs x)
12532   // fold (fabs (fcopysign x, y)) -> (fabs x)
12533   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12534     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12535 
12536   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12537   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12538     SDValue Int = N0.getOperand(0);
12539     EVT IntVT = Int.getValueType();
12540     if (IntVT.isInteger() && !IntVT.isVector()) {
12541       APInt SignMask;
12542       if (N0.getValueType().isVector()) {
12543         // For a vector, get a mask such as 0x7f... per scalar element
12544         // and splat it.
12545         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12546         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12547       } else {
12548         // For a scalar, just generate 0x7f...
12549         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12550       }
12551       SDLoc DL(N0);
12552       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12553                         DAG.getConstant(SignMask, DL, IntVT));
12554       AddToWorklist(Int.getNode());
12555       return DAG.getBitcast(N->getValueType(0), Int);
12556     }
12557   }
12558 
12559   return SDValue();
12560 }
12561 
12562 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12563   SDValue Chain = N->getOperand(0);
12564   SDValue N1 = N->getOperand(1);
12565   SDValue N2 = N->getOperand(2);
12566 
12567   // If N is a constant we could fold this into a fallthrough or unconditional
12568   // branch. However that doesn't happen very often in normal code, because
12569   // Instcombine/SimplifyCFG should have handled the available opportunities.
12570   // If we did this folding here, it would be necessary to update the
12571   // MachineBasicBlock CFG, which is awkward.
12572 
12573   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12574   // on the target.
12575   if (N1.getOpcode() == ISD::SETCC &&
12576       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12577                                    N1.getOperand(0).getValueType())) {
12578     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12579                        Chain, N1.getOperand(2),
12580                        N1.getOperand(0), N1.getOperand(1), N2);
12581   }
12582 
12583   if (N1.hasOneUse()) {
12584     if (SDValue NewN1 = rebuildSetCC(N1))
12585       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12586   }
12587 
12588   return SDValue();
12589 }
12590 
12591 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12592   if (N.getOpcode() == ISD::SRL ||
12593       (N.getOpcode() == ISD::TRUNCATE &&
12594        (N.getOperand(0).hasOneUse() &&
12595         N.getOperand(0).getOpcode() == ISD::SRL))) {
12596     // Look pass the truncate.
12597     if (N.getOpcode() == ISD::TRUNCATE)
12598       N = N.getOperand(0);
12599 
12600     // Match this pattern so that we can generate simpler code:
12601     //
12602     //   %a = ...
12603     //   %b = and i32 %a, 2
12604     //   %c = srl i32 %b, 1
12605     //   brcond i32 %c ...
12606     //
12607     // into
12608     //
12609     //   %a = ...
12610     //   %b = and i32 %a, 2
12611     //   %c = setcc eq %b, 0
12612     //   brcond %c ...
12613     //
12614     // This applies only when the AND constant value has one bit set and the
12615     // SRL constant is equal to the log2 of the AND constant. The back-end is
12616     // smart enough to convert the result into a TEST/JMP sequence.
12617     SDValue Op0 = N.getOperand(0);
12618     SDValue Op1 = N.getOperand(1);
12619 
12620     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12621       SDValue AndOp1 = Op0.getOperand(1);
12622 
12623       if (AndOp1.getOpcode() == ISD::Constant) {
12624         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12625 
12626         if (AndConst.isPowerOf2() &&
12627             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12628           SDLoc DL(N);
12629           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12630                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12631                               ISD::SETNE);
12632         }
12633       }
12634     }
12635   }
12636 
12637   // Transform br(xor(x, y)) -> br(x != y)
12638   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12639   if (N.getOpcode() == ISD::XOR) {
12640     // Because we may call this on a speculatively constructed
12641     // SimplifiedSetCC Node, we need to simplify this node first.
12642     // Ideally this should be folded into SimplifySetCC and not
12643     // here. For now, grab a handle to N so we don't lose it from
12644     // replacements interal to the visit.
12645     HandleSDNode XORHandle(N);
12646     while (N.getOpcode() == ISD::XOR) {
12647       SDValue Tmp = visitXOR(N.getNode());
12648       // No simplification done.
12649       if (!Tmp.getNode())
12650         break;
12651       // Returning N is form in-visit replacement that may invalidated
12652       // N. Grab value from Handle.
12653       if (Tmp.getNode() == N.getNode())
12654         N = XORHandle.getValue();
12655       else // Node simplified. Try simplifying again.
12656         N = Tmp;
12657     }
12658 
12659     if (N.getOpcode() != ISD::XOR)
12660       return N;
12661 
12662     SDNode *TheXor = N.getNode();
12663 
12664     SDValue Op0 = TheXor->getOperand(0);
12665     SDValue Op1 = TheXor->getOperand(1);
12666 
12667     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12668       bool Equal = false;
12669       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12670           Op0.getOpcode() == ISD::XOR) {
12671         TheXor = Op0.getNode();
12672         Equal = true;
12673       }
12674 
12675       EVT SetCCVT = N.getValueType();
12676       if (LegalTypes)
12677         SetCCVT = getSetCCResultType(SetCCVT);
12678       // Replace the uses of XOR with SETCC
12679       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12680                           Equal ? ISD::SETEQ : ISD::SETNE);
12681     }
12682   }
12683 
12684   return SDValue();
12685 }
12686 
12687 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12688 //
12689 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12690   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12691   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12692 
12693   // If N is a constant we could fold this into a fallthrough or unconditional
12694   // branch. However that doesn't happen very often in normal code, because
12695   // Instcombine/SimplifyCFG should have handled the available opportunities.
12696   // If we did this folding here, it would be necessary to update the
12697   // MachineBasicBlock CFG, which is awkward.
12698 
12699   // Use SimplifySetCC to simplify SETCC's.
12700   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12701                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12702                                false);
12703   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12704 
12705   // fold to a simpler setcc
12706   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12707     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12708                        N->getOperand(0), Simp.getOperand(2),
12709                        Simp.getOperand(0), Simp.getOperand(1),
12710                        N->getOperand(4));
12711 
12712   return SDValue();
12713 }
12714 
12715 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12716 /// and that N may be folded in the load / store addressing mode.
12717 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12718                                     SelectionDAG &DAG,
12719                                     const TargetLowering &TLI) {
12720   EVT VT;
12721   unsigned AS;
12722 
12723   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12724     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12725       return false;
12726     VT = LD->getMemoryVT();
12727     AS = LD->getAddressSpace();
12728   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12729     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12730       return false;
12731     VT = ST->getMemoryVT();
12732     AS = ST->getAddressSpace();
12733   } else
12734     return false;
12735 
12736   TargetLowering::AddrMode AM;
12737   if (N->getOpcode() == ISD::ADD) {
12738     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12739     if (Offset)
12740       // [reg +/- imm]
12741       AM.BaseOffs = Offset->getSExtValue();
12742     else
12743       // [reg +/- reg]
12744       AM.Scale = 1;
12745   } else if (N->getOpcode() == ISD::SUB) {
12746     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12747     if (Offset)
12748       // [reg +/- imm]
12749       AM.BaseOffs = -Offset->getSExtValue();
12750     else
12751       // [reg +/- reg]
12752       AM.Scale = 1;
12753   } else
12754     return false;
12755 
12756   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12757                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12758 }
12759 
12760 /// Try turning a load/store into a pre-indexed load/store when the base
12761 /// pointer is an add or subtract and it has other uses besides the load/store.
12762 /// After the transformation, the new indexed load/store has effectively folded
12763 /// the add/subtract in and all of its other uses are redirected to the
12764 /// new load/store.
12765 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
12766   if (Level < AfterLegalizeDAG)
12767     return false;
12768 
12769   bool isLoad = true;
12770   SDValue Ptr;
12771   EVT VT;
12772   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
12773     if (LD->isIndexed())
12774       return false;
12775     VT = LD->getMemoryVT();
12776     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
12777         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
12778       return false;
12779     Ptr = LD->getBasePtr();
12780   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
12781     if (ST->isIndexed())
12782       return false;
12783     VT = ST->getMemoryVT();
12784     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
12785         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
12786       return false;
12787     Ptr = ST->getBasePtr();
12788     isLoad = false;
12789   } else {
12790     return false;
12791   }
12792 
12793   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
12794   // out.  There is no reason to make this a preinc/predec.
12795   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
12796       Ptr.getNode()->hasOneUse())
12797     return false;
12798 
12799   // Ask the target to do addressing mode selection.
12800   SDValue BasePtr;
12801   SDValue Offset;
12802   ISD::MemIndexedMode AM = ISD::UNINDEXED;
12803   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
12804     return false;
12805 
12806   // Backends without true r+i pre-indexed forms may need to pass a
12807   // constant base with a variable offset so that constant coercion
12808   // will work with the patterns in canonical form.
12809   bool Swapped = false;
12810   if (isa<ConstantSDNode>(BasePtr)) {
12811     std::swap(BasePtr, Offset);
12812     Swapped = true;
12813   }
12814 
12815   // Don't create a indexed load / store with zero offset.
12816   if (isNullConstant(Offset))
12817     return false;
12818 
12819   // Try turning it into a pre-indexed load / store except when:
12820   // 1) The new base ptr is a frame index.
12821   // 2) If N is a store and the new base ptr is either the same as or is a
12822   //    predecessor of the value being stored.
12823   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
12824   //    that would create a cycle.
12825   // 4) All uses are load / store ops that use it as old base ptr.
12826 
12827   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
12828   // (plus the implicit offset) to a register to preinc anyway.
12829   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
12830     return false;
12831 
12832   // Check #2.
12833   if (!isLoad) {
12834     SDValue Val = cast<StoreSDNode>(N)->getValue();
12835 
12836     // Would require a copy.
12837     if (Val == BasePtr)
12838       return false;
12839 
12840     // Would create a cycle.
12841     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
12842       return false;
12843   }
12844 
12845   // Caches for hasPredecessorHelper.
12846   SmallPtrSet<const SDNode *, 32> Visited;
12847   SmallVector<const SDNode *, 16> Worklist;
12848   Worklist.push_back(N);
12849 
12850   // If the offset is a constant, there may be other adds of constants that
12851   // can be folded with this one. We should do this to avoid having to keep
12852   // a copy of the original base pointer.
12853   SmallVector<SDNode *, 16> OtherUses;
12854   if (isa<ConstantSDNode>(Offset))
12855     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
12856                               UE = BasePtr.getNode()->use_end();
12857          UI != UE; ++UI) {
12858       SDUse &Use = UI.getUse();
12859       // Skip the use that is Ptr and uses of other results from BasePtr's
12860       // node (important for nodes that return multiple results).
12861       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
12862         continue;
12863 
12864       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
12865         continue;
12866 
12867       if (Use.getUser()->getOpcode() != ISD::ADD &&
12868           Use.getUser()->getOpcode() != ISD::SUB) {
12869         OtherUses.clear();
12870         break;
12871       }
12872 
12873       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
12874       if (!isa<ConstantSDNode>(Op1)) {
12875         OtherUses.clear();
12876         break;
12877       }
12878 
12879       // FIXME: In some cases, we can be smarter about this.
12880       if (Op1.getValueType() != Offset.getValueType()) {
12881         OtherUses.clear();
12882         break;
12883       }
12884 
12885       OtherUses.push_back(Use.getUser());
12886     }
12887 
12888   if (Swapped)
12889     std::swap(BasePtr, Offset);
12890 
12891   // Now check for #3 and #4.
12892   bool RealUse = false;
12893 
12894   for (SDNode *Use : Ptr.getNode()->uses()) {
12895     if (Use == N)
12896       continue;
12897     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
12898       return false;
12899 
12900     // If Ptr may be folded in addressing mode of other use, then it's
12901     // not profitable to do this transformation.
12902     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
12903       RealUse = true;
12904   }
12905 
12906   if (!RealUse)
12907     return false;
12908 
12909   SDValue Result;
12910   if (isLoad)
12911     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
12912                                 BasePtr, Offset, AM);
12913   else
12914     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
12915                                  BasePtr, Offset, AM);
12916   ++PreIndexedNodes;
12917   ++NodesCombined;
12918   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
12919              Result.getNode()->dump(&DAG); dbgs() << '\n');
12920   WorklistRemover DeadNodes(*this);
12921   if (isLoad) {
12922     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
12923     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
12924   } else {
12925     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
12926   }
12927 
12928   // Finally, since the node is now dead, remove it from the graph.
12929   deleteAndRecombine(N);
12930 
12931   if (Swapped)
12932     std::swap(BasePtr, Offset);
12933 
12934   // Replace other uses of BasePtr that can be updated to use Ptr
12935   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
12936     unsigned OffsetIdx = 1;
12937     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
12938       OffsetIdx = 0;
12939     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
12940            BasePtr.getNode() && "Expected BasePtr operand");
12941 
12942     // We need to replace ptr0 in the following expression:
12943     //   x0 * offset0 + y0 * ptr0 = t0
12944     // knowing that
12945     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
12946     //
12947     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
12948     // indexed load/store and the expression that needs to be re-written.
12949     //
12950     // Therefore, we have:
12951     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
12952 
12953     ConstantSDNode *CN =
12954       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
12955     int X0, X1, Y0, Y1;
12956     const APInt &Offset0 = CN->getAPIntValue();
12957     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
12958 
12959     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
12960     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
12961     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
12962     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
12963 
12964     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
12965 
12966     APInt CNV = Offset0;
12967     if (X0 < 0) CNV = -CNV;
12968     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
12969     else CNV = CNV - Offset1;
12970 
12971     SDLoc DL(OtherUses[i]);
12972 
12973     // We can now generate the new expression.
12974     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
12975     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
12976 
12977     SDValue NewUse = DAG.getNode(Opcode,
12978                                  DL,
12979                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
12980     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
12981     deleteAndRecombine(OtherUses[i]);
12982   }
12983 
12984   // Replace the uses of Ptr with uses of the updated base value.
12985   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
12986   deleteAndRecombine(Ptr.getNode());
12987   AddToWorklist(Result.getNode());
12988 
12989   return true;
12990 }
12991 
12992 /// Try to combine a load/store with a add/sub of the base pointer node into a
12993 /// post-indexed load/store. The transformation folded the add/subtract into the
12994 /// new indexed load/store effectively and all of its uses are redirected to the
12995 /// new load/store.
12996 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
12997   if (Level < AfterLegalizeDAG)
12998     return false;
12999 
13000   bool isLoad = true;
13001   SDValue Ptr;
13002   EVT VT;
13003   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13004     if (LD->isIndexed())
13005       return false;
13006     VT = LD->getMemoryVT();
13007     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13008         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13009       return false;
13010     Ptr = LD->getBasePtr();
13011   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13012     if (ST->isIndexed())
13013       return false;
13014     VT = ST->getMemoryVT();
13015     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13016         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13017       return false;
13018     Ptr = ST->getBasePtr();
13019     isLoad = false;
13020   } else {
13021     return false;
13022   }
13023 
13024   if (Ptr.getNode()->hasOneUse())
13025     return false;
13026 
13027   for (SDNode *Op : Ptr.getNode()->uses()) {
13028     if (Op == N ||
13029         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13030       continue;
13031 
13032     SDValue BasePtr;
13033     SDValue Offset;
13034     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13035     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13036       // Don't create a indexed load / store with zero offset.
13037       if (isNullConstant(Offset))
13038         continue;
13039 
13040       // Try turning it into a post-indexed load / store except when
13041       // 1) All uses are load / store ops that use it as base ptr (and
13042       //    it may be folded as addressing mmode).
13043       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13044       //    nor a successor of N. Otherwise, if Op is folded that would
13045       //    create a cycle.
13046 
13047       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13048         continue;
13049 
13050       // Check for #1.
13051       bool TryNext = false;
13052       for (SDNode *Use : BasePtr.getNode()->uses()) {
13053         if (Use == Ptr.getNode())
13054           continue;
13055 
13056         // If all the uses are load / store addresses, then don't do the
13057         // transformation.
13058         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13059           bool RealUse = false;
13060           for (SDNode *UseUse : Use->uses()) {
13061             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13062               RealUse = true;
13063           }
13064 
13065           if (!RealUse) {
13066             TryNext = true;
13067             break;
13068           }
13069         }
13070       }
13071 
13072       if (TryNext)
13073         continue;
13074 
13075       // Check for #2.
13076       SmallPtrSet<const SDNode *, 32> Visited;
13077       SmallVector<const SDNode *, 8> Worklist;
13078       // Ptr is predecessor to both N and Op.
13079       Visited.insert(Ptr.getNode());
13080       Worklist.push_back(N);
13081       Worklist.push_back(Op);
13082       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13083           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13084         SDValue Result = isLoad
13085           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13086                                BasePtr, Offset, AM)
13087           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13088                                 BasePtr, Offset, AM);
13089         ++PostIndexedNodes;
13090         ++NodesCombined;
13091         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13092                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13093                    dbgs() << '\n');
13094         WorklistRemover DeadNodes(*this);
13095         if (isLoad) {
13096           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13097           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13098         } else {
13099           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13100         }
13101 
13102         // Finally, since the node is now dead, remove it from the graph.
13103         deleteAndRecombine(N);
13104 
13105         // Replace the uses of Use with uses of the updated base value.
13106         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13107                                       Result.getValue(isLoad ? 1 : 0));
13108         deleteAndRecombine(Op);
13109         return true;
13110       }
13111     }
13112   }
13113 
13114   return false;
13115 }
13116 
13117 /// Return the base-pointer arithmetic from an indexed \p LD.
13118 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13119   ISD::MemIndexedMode AM = LD->getAddressingMode();
13120   assert(AM != ISD::UNINDEXED);
13121   SDValue BP = LD->getOperand(1);
13122   SDValue Inc = LD->getOperand(2);
13123 
13124   // Some backends use TargetConstants for load offsets, but don't expect
13125   // TargetConstants in general ADD nodes. We can convert these constants into
13126   // regular Constants (if the constant is not opaque).
13127   assert((Inc.getOpcode() != ISD::TargetConstant ||
13128           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13129          "Cannot split out indexing using opaque target constants");
13130   if (Inc.getOpcode() == ISD::TargetConstant) {
13131     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13132     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13133                           ConstInc->getValueType(0));
13134   }
13135 
13136   unsigned Opc =
13137       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13138   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13139 }
13140 
13141 static inline int numVectorEltsOrZero(EVT T) {
13142   return T.isVector() ? T.getVectorNumElements() : 0;
13143 }
13144 
13145 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13146   Val = ST->getValue();
13147   EVT STType = Val.getValueType();
13148   EVT STMemType = ST->getMemoryVT();
13149   if (STType == STMemType)
13150     return true;
13151   if (isTypeLegal(STMemType))
13152     return false; // fail.
13153   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13154       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13155     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13156     return true;
13157   }
13158   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13159       STType.isInteger() && STMemType.isInteger()) {
13160     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13161     return true;
13162   }
13163   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13164     Val = DAG.getBitcast(STMemType, Val);
13165     return true;
13166   }
13167   return false; // fail.
13168 }
13169 
13170 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13171   EVT LDMemType = LD->getMemoryVT();
13172   EVT LDType = LD->getValueType(0);
13173   assert(Val.getValueType() == LDMemType &&
13174          "Attempting to extend value of non-matching type");
13175   if (LDType == LDMemType)
13176     return true;
13177   if (LDMemType.isInteger() && LDType.isInteger()) {
13178     switch (LD->getExtensionType()) {
13179     case ISD::NON_EXTLOAD:
13180       Val = DAG.getBitcast(LDType, Val);
13181       return true;
13182     case ISD::EXTLOAD:
13183       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13184       return true;
13185     case ISD::SEXTLOAD:
13186       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13187       return true;
13188     case ISD::ZEXTLOAD:
13189       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13190       return true;
13191     }
13192   }
13193   return false;
13194 }
13195 
13196 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13197   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13198     return SDValue();
13199   SDValue Chain = LD->getOperand(0);
13200   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13201   if (!ST || ST->isVolatile())
13202     return SDValue();
13203 
13204   EVT LDType = LD->getValueType(0);
13205   EVT LDMemType = LD->getMemoryVT();
13206   EVT STMemType = ST->getMemoryVT();
13207   EVT STType = ST->getValue().getValueType();
13208 
13209   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13210   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13211   int64_t Offset;
13212   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13213     return SDValue();
13214 
13215   // Normalize for Endianness. After this Offset=0 will denote that the least
13216   // significant bit in the loaded value maps to the least significant bit in
13217   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13218   // n:th least significant byte of the stored value.
13219   if (DAG.getDataLayout().isBigEndian())
13220     Offset = (STMemType.getStoreSizeInBits() -
13221               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13222 
13223   // Check that the stored value cover all bits that are loaded.
13224   bool STCoversLD =
13225       (Offset >= 0) &&
13226       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13227 
13228   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13229     if (LD->isIndexed()) {
13230       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13231                     LD->getAddressingMode() == ISD::POST_DEC);
13232       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13233       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13234                              LD->getOperand(1), LD->getOperand(2));
13235       SDValue Ops[] = {Val, Idx, Chain};
13236       return CombineTo(LD, Ops, 3);
13237     }
13238     return CombineTo(LD, Val, Chain);
13239   };
13240 
13241   if (!STCoversLD)
13242     return SDValue();
13243 
13244   // Memory as copy space (potentially masked).
13245   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13246     // Simple case: Direct non-truncating forwarding
13247     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13248       return ReplaceLd(LD, ST->getValue(), Chain);
13249     // Can we model the truncate and extension with an and mask?
13250     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13251         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13252       // Mask to size of LDMemType
13253       auto Mask =
13254           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13255                                                STMemType.getSizeInBits()),
13256                           SDLoc(ST), STType);
13257       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13258       return ReplaceLd(LD, Val, Chain);
13259     }
13260   }
13261 
13262   // TODO: Deal with nonzero offset.
13263   if (LD->getBasePtr().isUndef() || Offset != 0)
13264     return SDValue();
13265   // Model necessary truncations / extenstions.
13266   SDValue Val;
13267   // Truncate Value To Stored Memory Size.
13268   do {
13269     if (!getTruncatedStoreValue(ST, Val))
13270       continue;
13271     if (!isTypeLegal(LDMemType))
13272       continue;
13273     if (STMemType != LDMemType) {
13274       // TODO: Support vectors? This requires extract_subvector/bitcast.
13275       if (!STMemType.isVector() && !LDMemType.isVector() &&
13276           STMemType.isInteger() && LDMemType.isInteger())
13277         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13278       else
13279         continue;
13280     }
13281     if (!extendLoadedValueToExtension(LD, Val))
13282       continue;
13283     return ReplaceLd(LD, Val, Chain);
13284   } while (false);
13285 
13286   // On failure, cleanup dead nodes we may have created.
13287   if (Val->use_empty())
13288     deleteAndRecombine(Val.getNode());
13289   return SDValue();
13290 }
13291 
13292 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13293   LoadSDNode *LD  = cast<LoadSDNode>(N);
13294   SDValue Chain = LD->getChain();
13295   SDValue Ptr   = LD->getBasePtr();
13296 
13297   // If load is not volatile and there are no uses of the loaded value (and
13298   // the updated indexed value in case of indexed loads), change uses of the
13299   // chain value into uses of the chain input (i.e. delete the dead load).
13300   if (!LD->isVolatile()) {
13301     if (N->getValueType(1) == MVT::Other) {
13302       // Unindexed loads.
13303       if (!N->hasAnyUseOfValue(0)) {
13304         // It's not safe to use the two value CombineTo variant here. e.g.
13305         // v1, chain2 = load chain1, loc
13306         // v2, chain3 = load chain2, loc
13307         // v3         = add v2, c
13308         // Now we replace use of chain2 with chain1.  This makes the second load
13309         // isomorphic to the one we are deleting, and thus makes this load live.
13310         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13311                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13312                    dbgs() << "\n");
13313         WorklistRemover DeadNodes(*this);
13314         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13315         AddUsersToWorklist(Chain.getNode());
13316         if (N->use_empty())
13317           deleteAndRecombine(N);
13318 
13319         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13320       }
13321     } else {
13322       // Indexed loads.
13323       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13324 
13325       // If this load has an opaque TargetConstant offset, then we cannot split
13326       // the indexing into an add/sub directly (that TargetConstant may not be
13327       // valid for a different type of node, and we cannot convert an opaque
13328       // target constant into a regular constant).
13329       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13330                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13331 
13332       if (!N->hasAnyUseOfValue(0) &&
13333           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13334         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13335         SDValue Index;
13336         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13337           Index = SplitIndexingFromLoad(LD);
13338           // Try to fold the base pointer arithmetic into subsequent loads and
13339           // stores.
13340           AddUsersToWorklist(N);
13341         } else
13342           Index = DAG.getUNDEF(N->getValueType(1));
13343         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13344                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13345                    dbgs() << " and 2 other values\n");
13346         WorklistRemover DeadNodes(*this);
13347         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13348         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13349         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13350         deleteAndRecombine(N);
13351         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13352       }
13353     }
13354   }
13355 
13356   // If this load is directly stored, replace the load value with the stored
13357   // value.
13358   if (auto V = ForwardStoreValueToDirectLoad(LD))
13359     return V;
13360 
13361   // Try to infer better alignment information than the load already has.
13362   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13363     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13364       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13365         SDValue NewLoad = DAG.getExtLoad(
13366             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13367             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13368             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13369         // NewLoad will always be N as we are only refining the alignment
13370         assert(NewLoad.getNode() == N);
13371         (void)NewLoad;
13372       }
13373     }
13374   }
13375 
13376   if (LD->isUnindexed()) {
13377     // Walk up chain skipping non-aliasing memory nodes.
13378     SDValue BetterChain = FindBetterChain(LD, Chain);
13379 
13380     // If there is a better chain.
13381     if (Chain != BetterChain) {
13382       SDValue ReplLoad;
13383 
13384       // Replace the chain to void dependency.
13385       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13386         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13387                                BetterChain, Ptr, LD->getMemOperand());
13388       } else {
13389         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13390                                   LD->getValueType(0),
13391                                   BetterChain, Ptr, LD->getMemoryVT(),
13392                                   LD->getMemOperand());
13393       }
13394 
13395       // Create token factor to keep old chain connected.
13396       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13397                                   MVT::Other, Chain, ReplLoad.getValue(1));
13398 
13399       // Replace uses with load result and token factor
13400       return CombineTo(N, ReplLoad.getValue(0), Token);
13401     }
13402   }
13403 
13404   // Try transforming N to an indexed load.
13405   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13406     return SDValue(N, 0);
13407 
13408   // Try to slice up N to more direct loads if the slices are mapped to
13409   // different register banks or pairing can take place.
13410   if (SliceUpLoad(N))
13411     return SDValue(N, 0);
13412 
13413   return SDValue();
13414 }
13415 
13416 namespace {
13417 
13418 /// Helper structure used to slice a load in smaller loads.
13419 /// Basically a slice is obtained from the following sequence:
13420 /// Origin = load Ty1, Base
13421 /// Shift = srl Ty1 Origin, CstTy Amount
13422 /// Inst = trunc Shift to Ty2
13423 ///
13424 /// Then, it will be rewritten into:
13425 /// Slice = load SliceTy, Base + SliceOffset
13426 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13427 ///
13428 /// SliceTy is deduced from the number of bits that are actually used to
13429 /// build Inst.
13430 struct LoadedSlice {
13431   /// Helper structure used to compute the cost of a slice.
13432   struct Cost {
13433     /// Are we optimizing for code size.
13434     bool ForCodeSize;
13435 
13436     /// Various cost.
13437     unsigned Loads = 0;
13438     unsigned Truncates = 0;
13439     unsigned CrossRegisterBanksCopies = 0;
13440     unsigned ZExts = 0;
13441     unsigned Shift = 0;
13442 
13443     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13444 
13445     /// Get the cost of one isolated slice.
13446     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13447         : ForCodeSize(ForCodeSize), Loads(1) {
13448       EVT TruncType = LS.Inst->getValueType(0);
13449       EVT LoadedType = LS.getLoadedType();
13450       if (TruncType != LoadedType &&
13451           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13452         ZExts = 1;
13453     }
13454 
13455     /// Account for slicing gain in the current cost.
13456     /// Slicing provide a few gains like removing a shift or a
13457     /// truncate. This method allows to grow the cost of the original
13458     /// load with the gain from this slice.
13459     void addSliceGain(const LoadedSlice &LS) {
13460       // Each slice saves a truncate.
13461       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13462       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13463                               LS.Inst->getValueType(0)))
13464         ++Truncates;
13465       // If there is a shift amount, this slice gets rid of it.
13466       if (LS.Shift)
13467         ++Shift;
13468       // If this slice can merge a cross register bank copy, account for it.
13469       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13470         ++CrossRegisterBanksCopies;
13471     }
13472 
13473     Cost &operator+=(const Cost &RHS) {
13474       Loads += RHS.Loads;
13475       Truncates += RHS.Truncates;
13476       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13477       ZExts += RHS.ZExts;
13478       Shift += RHS.Shift;
13479       return *this;
13480     }
13481 
13482     bool operator==(const Cost &RHS) const {
13483       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13484              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13485              ZExts == RHS.ZExts && Shift == RHS.Shift;
13486     }
13487 
13488     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13489 
13490     bool operator<(const Cost &RHS) const {
13491       // Assume cross register banks copies are as expensive as loads.
13492       // FIXME: Do we want some more target hooks?
13493       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13494       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13495       // Unless we are optimizing for code size, consider the
13496       // expensive operation first.
13497       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13498         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13499       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13500              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13501     }
13502 
13503     bool operator>(const Cost &RHS) const { return RHS < *this; }
13504 
13505     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13506 
13507     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13508   };
13509 
13510   // The last instruction that represent the slice. This should be a
13511   // truncate instruction.
13512   SDNode *Inst;
13513 
13514   // The original load instruction.
13515   LoadSDNode *Origin;
13516 
13517   // The right shift amount in bits from the original load.
13518   unsigned Shift;
13519 
13520   // The DAG from which Origin came from.
13521   // This is used to get some contextual information about legal types, etc.
13522   SelectionDAG *DAG;
13523 
13524   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13525               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13526       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13527 
13528   /// Get the bits used in a chunk of bits \p BitWidth large.
13529   /// \return Result is \p BitWidth and has used bits set to 1 and
13530   ///         not used bits set to 0.
13531   APInt getUsedBits() const {
13532     // Reproduce the trunc(lshr) sequence:
13533     // - Start from the truncated value.
13534     // - Zero extend to the desired bit width.
13535     // - Shift left.
13536     assert(Origin && "No original load to compare against.");
13537     unsigned BitWidth = Origin->getValueSizeInBits(0);
13538     assert(Inst && "This slice is not bound to an instruction");
13539     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13540            "Extracted slice is bigger than the whole type!");
13541     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13542     UsedBits.setAllBits();
13543     UsedBits = UsedBits.zext(BitWidth);
13544     UsedBits <<= Shift;
13545     return UsedBits;
13546   }
13547 
13548   /// Get the size of the slice to be loaded in bytes.
13549   unsigned getLoadedSize() const {
13550     unsigned SliceSize = getUsedBits().countPopulation();
13551     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13552     return SliceSize / 8;
13553   }
13554 
13555   /// Get the type that will be loaded for this slice.
13556   /// Note: This may not be the final type for the slice.
13557   EVT getLoadedType() const {
13558     assert(DAG && "Missing context");
13559     LLVMContext &Ctxt = *DAG->getContext();
13560     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13561   }
13562 
13563   /// Get the alignment of the load used for this slice.
13564   unsigned getAlignment() const {
13565     unsigned Alignment = Origin->getAlignment();
13566     unsigned Offset = getOffsetFromBase();
13567     if (Offset != 0)
13568       Alignment = MinAlign(Alignment, Alignment + Offset);
13569     return Alignment;
13570   }
13571 
13572   /// Check if this slice can be rewritten with legal operations.
13573   bool isLegal() const {
13574     // An invalid slice is not legal.
13575     if (!Origin || !Inst || !DAG)
13576       return false;
13577 
13578     // Offsets are for indexed load only, we do not handle that.
13579     if (!Origin->getOffset().isUndef())
13580       return false;
13581 
13582     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13583 
13584     // Check that the type is legal.
13585     EVT SliceType = getLoadedType();
13586     if (!TLI.isTypeLegal(SliceType))
13587       return false;
13588 
13589     // Check that the load is legal for this type.
13590     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13591       return false;
13592 
13593     // Check that the offset can be computed.
13594     // 1. Check its type.
13595     EVT PtrType = Origin->getBasePtr().getValueType();
13596     if (PtrType == MVT::Untyped || PtrType.isExtended())
13597       return false;
13598 
13599     // 2. Check that it fits in the immediate.
13600     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13601       return false;
13602 
13603     // 3. Check that the computation is legal.
13604     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13605       return false;
13606 
13607     // Check that the zext is legal if it needs one.
13608     EVT TruncateType = Inst->getValueType(0);
13609     if (TruncateType != SliceType &&
13610         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13611       return false;
13612 
13613     return true;
13614   }
13615 
13616   /// Get the offset in bytes of this slice in the original chunk of
13617   /// bits.
13618   /// \pre DAG != nullptr.
13619   uint64_t getOffsetFromBase() const {
13620     assert(DAG && "Missing context.");
13621     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13622     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13623     uint64_t Offset = Shift / 8;
13624     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13625     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13626            "The size of the original loaded type is not a multiple of a"
13627            " byte.");
13628     // If Offset is bigger than TySizeInBytes, it means we are loading all
13629     // zeros. This should have been optimized before in the process.
13630     assert(TySizeInBytes > Offset &&
13631            "Invalid shift amount for given loaded size");
13632     if (IsBigEndian)
13633       Offset = TySizeInBytes - Offset - getLoadedSize();
13634     return Offset;
13635   }
13636 
13637   /// Generate the sequence of instructions to load the slice
13638   /// represented by this object and redirect the uses of this slice to
13639   /// this new sequence of instructions.
13640   /// \pre this->Inst && this->Origin are valid Instructions and this
13641   /// object passed the legal check: LoadedSlice::isLegal returned true.
13642   /// \return The last instruction of the sequence used to load the slice.
13643   SDValue loadSlice() const {
13644     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13645     const SDValue &OldBaseAddr = Origin->getBasePtr();
13646     SDValue BaseAddr = OldBaseAddr;
13647     // Get the offset in that chunk of bytes w.r.t. the endianness.
13648     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13649     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13650     if (Offset) {
13651       // BaseAddr = BaseAddr + Offset.
13652       EVT ArithType = BaseAddr.getValueType();
13653       SDLoc DL(Origin);
13654       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13655                               DAG->getConstant(Offset, DL, ArithType));
13656     }
13657 
13658     // Create the type of the loaded slice according to its size.
13659     EVT SliceType = getLoadedType();
13660 
13661     // Create the load for the slice.
13662     SDValue LastInst =
13663         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13664                      Origin->getPointerInfo().getWithOffset(Offset),
13665                      getAlignment(), Origin->getMemOperand()->getFlags());
13666     // If the final type is not the same as the loaded type, this means that
13667     // we have to pad with zero. Create a zero extend for that.
13668     EVT FinalType = Inst->getValueType(0);
13669     if (SliceType != FinalType)
13670       LastInst =
13671           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13672     return LastInst;
13673   }
13674 
13675   /// Check if this slice can be merged with an expensive cross register
13676   /// bank copy. E.g.,
13677   /// i = load i32
13678   /// f = bitcast i32 i to float
13679   bool canMergeExpensiveCrossRegisterBankCopy() const {
13680     if (!Inst || !Inst->hasOneUse())
13681       return false;
13682     SDNode *Use = *Inst->use_begin();
13683     if (Use->getOpcode() != ISD::BITCAST)
13684       return false;
13685     assert(DAG && "Missing context");
13686     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13687     EVT ResVT = Use->getValueType(0);
13688     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13689     const TargetRegisterClass *ArgRC =
13690         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13691     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13692       return false;
13693 
13694     // At this point, we know that we perform a cross-register-bank copy.
13695     // Check if it is expensive.
13696     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13697     // Assume bitcasts are cheap, unless both register classes do not
13698     // explicitly share a common sub class.
13699     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13700       return false;
13701 
13702     // Check if it will be merged with the load.
13703     // 1. Check the alignment constraint.
13704     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13705         ResVT.getTypeForEVT(*DAG->getContext()));
13706 
13707     if (RequiredAlignment > getAlignment())
13708       return false;
13709 
13710     // 2. Check that the load is a legal operation for that type.
13711     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13712       return false;
13713 
13714     // 3. Check that we do not have a zext in the way.
13715     if (Inst->getValueType(0) != getLoadedType())
13716       return false;
13717 
13718     return true;
13719   }
13720 };
13721 
13722 } // end anonymous namespace
13723 
13724 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13725 /// \p UsedBits looks like 0..0 1..1 0..0.
13726 static bool areUsedBitsDense(const APInt &UsedBits) {
13727   // If all the bits are one, this is dense!
13728   if (UsedBits.isAllOnesValue())
13729     return true;
13730 
13731   // Get rid of the unused bits on the right.
13732   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13733   // Get rid of the unused bits on the left.
13734   if (NarrowedUsedBits.countLeadingZeros())
13735     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13736   // Check that the chunk of bits is completely used.
13737   return NarrowedUsedBits.isAllOnesValue();
13738 }
13739 
13740 /// Check whether or not \p First and \p Second are next to each other
13741 /// in memory. This means that there is no hole between the bits loaded
13742 /// by \p First and the bits loaded by \p Second.
13743 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13744                                      const LoadedSlice &Second) {
13745   assert(First.Origin == Second.Origin && First.Origin &&
13746          "Unable to match different memory origins.");
13747   APInt UsedBits = First.getUsedBits();
13748   assert((UsedBits & Second.getUsedBits()) == 0 &&
13749          "Slices are not supposed to overlap.");
13750   UsedBits |= Second.getUsedBits();
13751   return areUsedBitsDense(UsedBits);
13752 }
13753 
13754 /// Adjust the \p GlobalLSCost according to the target
13755 /// paring capabilities and the layout of the slices.
13756 /// \pre \p GlobalLSCost should account for at least as many loads as
13757 /// there is in the slices in \p LoadedSlices.
13758 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13759                                  LoadedSlice::Cost &GlobalLSCost) {
13760   unsigned NumberOfSlices = LoadedSlices.size();
13761   // If there is less than 2 elements, no pairing is possible.
13762   if (NumberOfSlices < 2)
13763     return;
13764 
13765   // Sort the slices so that elements that are likely to be next to each
13766   // other in memory are next to each other in the list.
13767   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
13768     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
13769     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
13770   });
13771   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
13772   // First (resp. Second) is the first (resp. Second) potentially candidate
13773   // to be placed in a paired load.
13774   const LoadedSlice *First = nullptr;
13775   const LoadedSlice *Second = nullptr;
13776   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
13777                 // Set the beginning of the pair.
13778                                                            First = Second) {
13779     Second = &LoadedSlices[CurrSlice];
13780 
13781     // If First is NULL, it means we start a new pair.
13782     // Get to the next slice.
13783     if (!First)
13784       continue;
13785 
13786     EVT LoadedType = First->getLoadedType();
13787 
13788     // If the types of the slices are different, we cannot pair them.
13789     if (LoadedType != Second->getLoadedType())
13790       continue;
13791 
13792     // Check if the target supplies paired loads for this type.
13793     unsigned RequiredAlignment = 0;
13794     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
13795       // move to the next pair, this type is hopeless.
13796       Second = nullptr;
13797       continue;
13798     }
13799     // Check if we meet the alignment requirement.
13800     if (RequiredAlignment > First->getAlignment())
13801       continue;
13802 
13803     // Check that both loads are next to each other in memory.
13804     if (!areSlicesNextToEachOther(*First, *Second))
13805       continue;
13806 
13807     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
13808     --GlobalLSCost.Loads;
13809     // Move to the next pair.
13810     Second = nullptr;
13811   }
13812 }
13813 
13814 /// Check the profitability of all involved LoadedSlice.
13815 /// Currently, it is considered profitable if there is exactly two
13816 /// involved slices (1) which are (2) next to each other in memory, and
13817 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
13818 ///
13819 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
13820 /// the elements themselves.
13821 ///
13822 /// FIXME: When the cost model will be mature enough, we can relax
13823 /// constraints (1) and (2).
13824 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13825                                 const APInt &UsedBits, bool ForCodeSize) {
13826   unsigned NumberOfSlices = LoadedSlices.size();
13827   if (StressLoadSlicing)
13828     return NumberOfSlices > 1;
13829 
13830   // Check (1).
13831   if (NumberOfSlices != 2)
13832     return false;
13833 
13834   // Check (2).
13835   if (!areUsedBitsDense(UsedBits))
13836     return false;
13837 
13838   // Check (3).
13839   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
13840   // The original code has one big load.
13841   OrigCost.Loads = 1;
13842   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
13843     const LoadedSlice &LS = LoadedSlices[CurrSlice];
13844     // Accumulate the cost of all the slices.
13845     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
13846     GlobalSlicingCost += SliceCost;
13847 
13848     // Account as cost in the original configuration the gain obtained
13849     // with the current slices.
13850     OrigCost.addSliceGain(LS);
13851   }
13852 
13853   // If the target supports paired load, adjust the cost accordingly.
13854   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
13855   return OrigCost > GlobalSlicingCost;
13856 }
13857 
13858 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
13859 /// operations, split it in the various pieces being extracted.
13860 ///
13861 /// This sort of thing is introduced by SROA.
13862 /// This slicing takes care not to insert overlapping loads.
13863 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
13864 bool DAGCombiner::SliceUpLoad(SDNode *N) {
13865   if (Level < AfterLegalizeDAG)
13866     return false;
13867 
13868   LoadSDNode *LD = cast<LoadSDNode>(N);
13869   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
13870       !LD->getValueType(0).isInteger())
13871     return false;
13872 
13873   // Keep track of already used bits to detect overlapping values.
13874   // In that case, we will just abort the transformation.
13875   APInt UsedBits(LD->getValueSizeInBits(0), 0);
13876 
13877   SmallVector<LoadedSlice, 4> LoadedSlices;
13878 
13879   // Check if this load is used as several smaller chunks of bits.
13880   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
13881   // of computation for each trunc.
13882   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
13883        UI != UIEnd; ++UI) {
13884     // Skip the uses of the chain.
13885     if (UI.getUse().getResNo() != 0)
13886       continue;
13887 
13888     SDNode *User = *UI;
13889     unsigned Shift = 0;
13890 
13891     // Check if this is a trunc(lshr).
13892     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
13893         isa<ConstantSDNode>(User->getOperand(1))) {
13894       Shift = User->getConstantOperandVal(1);
13895       User = *User->use_begin();
13896     }
13897 
13898     // At this point, User is a Truncate, iff we encountered, trunc or
13899     // trunc(lshr).
13900     if (User->getOpcode() != ISD::TRUNCATE)
13901       return false;
13902 
13903     // The width of the type must be a power of 2 and greater than 8-bits.
13904     // Otherwise the load cannot be represented in LLVM IR.
13905     // Moreover, if we shifted with a non-8-bits multiple, the slice
13906     // will be across several bytes. We do not support that.
13907     unsigned Width = User->getValueSizeInBits(0);
13908     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
13909       return false;
13910 
13911     // Build the slice for this chain of computations.
13912     LoadedSlice LS(User, LD, Shift, &DAG);
13913     APInt CurrentUsedBits = LS.getUsedBits();
13914 
13915     // Check if this slice overlaps with another.
13916     if ((CurrentUsedBits & UsedBits) != 0)
13917       return false;
13918     // Update the bits used globally.
13919     UsedBits |= CurrentUsedBits;
13920 
13921     // Check if the new slice would be legal.
13922     if (!LS.isLegal())
13923       return false;
13924 
13925     // Record the slice.
13926     LoadedSlices.push_back(LS);
13927   }
13928 
13929   // Abort slicing if it does not seem to be profitable.
13930   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
13931     return false;
13932 
13933   ++SlicedLoads;
13934 
13935   // Rewrite each chain to use an independent load.
13936   // By construction, each chain can be represented by a unique load.
13937 
13938   // Prepare the argument for the new token factor for all the slices.
13939   SmallVector<SDValue, 8> ArgChains;
13940   for (SmallVectorImpl<LoadedSlice>::const_iterator
13941            LSIt = LoadedSlices.begin(),
13942            LSItEnd = LoadedSlices.end();
13943        LSIt != LSItEnd; ++LSIt) {
13944     SDValue SliceInst = LSIt->loadSlice();
13945     CombineTo(LSIt->Inst, SliceInst, true);
13946     if (SliceInst.getOpcode() != ISD::LOAD)
13947       SliceInst = SliceInst.getOperand(0);
13948     assert(SliceInst->getOpcode() == ISD::LOAD &&
13949            "It takes more than a zext to get to the loaded slice!!");
13950     ArgChains.push_back(SliceInst.getValue(1));
13951   }
13952 
13953   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
13954                               ArgChains);
13955   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13956   AddToWorklist(Chain.getNode());
13957   return true;
13958 }
13959 
13960 /// Check to see if V is (and load (ptr), imm), where the load is having
13961 /// specific bytes cleared out.  If so, return the byte size being masked out
13962 /// and the shift amount.
13963 static std::pair<unsigned, unsigned>
13964 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
13965   std::pair<unsigned, unsigned> Result(0, 0);
13966 
13967   // Check for the structure we're looking for.
13968   if (V->getOpcode() != ISD::AND ||
13969       !isa<ConstantSDNode>(V->getOperand(1)) ||
13970       !ISD::isNormalLoad(V->getOperand(0).getNode()))
13971     return Result;
13972 
13973   // Check the chain and pointer.
13974   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
13975   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
13976 
13977   // This only handles simple types.
13978   if (V.getValueType() != MVT::i16 &&
13979       V.getValueType() != MVT::i32 &&
13980       V.getValueType() != MVT::i64)
13981     return Result;
13982 
13983   // Check the constant mask.  Invert it so that the bits being masked out are
13984   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
13985   // follow the sign bit for uniformity.
13986   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
13987   unsigned NotMaskLZ = countLeadingZeros(NotMask);
13988   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
13989   unsigned NotMaskTZ = countTrailingZeros(NotMask);
13990   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
13991   if (NotMaskLZ == 64) return Result;  // All zero mask.
13992 
13993   // See if we have a continuous run of bits.  If so, we have 0*1+0*
13994   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
13995     return Result;
13996 
13997   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
13998   if (V.getValueType() != MVT::i64 && NotMaskLZ)
13999     NotMaskLZ -= 64-V.getValueSizeInBits();
14000 
14001   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14002   switch (MaskedBytes) {
14003   case 1:
14004   case 2:
14005   case 4: break;
14006   default: return Result; // All one mask, or 5-byte mask.
14007   }
14008 
14009   // Verify that the first bit starts at a multiple of mask so that the access
14010   // is aligned the same as the access width.
14011   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14012 
14013   // For narrowing to be valid, it must be the case that the load the
14014   // immediately preceding memory operation before the store.
14015   if (LD == Chain.getNode())
14016     ; // ok.
14017   else if (Chain->getOpcode() == ISD::TokenFactor &&
14018            SDValue(LD, 1).hasOneUse()) {
14019     // LD has only 1 chain use so they are no indirect dependencies.
14020     bool isOk = false;
14021     for (const SDValue &ChainOp : Chain->op_values())
14022       if (ChainOp.getNode() == LD) {
14023         isOk = true;
14024         break;
14025       }
14026     if (!isOk)
14027       return Result;
14028   } else
14029     return Result; // Fail.
14030 
14031   Result.first = MaskedBytes;
14032   Result.second = NotMaskTZ/8;
14033   return Result;
14034 }
14035 
14036 /// Check to see if IVal is something that provides a value as specified by
14037 /// MaskInfo. If so, replace the specified store with a narrower store of
14038 /// truncated IVal.
14039 static SDNode *
14040 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14041                                 SDValue IVal, StoreSDNode *St,
14042                                 DAGCombiner *DC) {
14043   unsigned NumBytes = MaskInfo.first;
14044   unsigned ByteShift = MaskInfo.second;
14045   SelectionDAG &DAG = DC->getDAG();
14046 
14047   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14048   // that uses this.  If not, this is not a replacement.
14049   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14050                                   ByteShift*8, (ByteShift+NumBytes)*8);
14051   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14052 
14053   // Check that it is legal on the target to do this.  It is legal if the new
14054   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14055   // legalization.
14056   MVT VT = MVT::getIntegerVT(NumBytes*8);
14057   if (!DC->isTypeLegal(VT))
14058     return nullptr;
14059 
14060   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14061   // shifted by ByteShift and truncated down to NumBytes.
14062   if (ByteShift) {
14063     SDLoc DL(IVal);
14064     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14065                        DAG.getConstant(ByteShift*8, DL,
14066                                     DC->getShiftAmountTy(IVal.getValueType())));
14067   }
14068 
14069   // Figure out the offset for the store and the alignment of the access.
14070   unsigned StOffset;
14071   unsigned NewAlign = St->getAlignment();
14072 
14073   if (DAG.getDataLayout().isLittleEndian())
14074     StOffset = ByteShift;
14075   else
14076     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14077 
14078   SDValue Ptr = St->getBasePtr();
14079   if (StOffset) {
14080     SDLoc DL(IVal);
14081     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14082                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14083     NewAlign = MinAlign(NewAlign, StOffset);
14084   }
14085 
14086   // Truncate down to the new size.
14087   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14088 
14089   ++OpsNarrowed;
14090   return DAG
14091       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14092                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14093       .getNode();
14094 }
14095 
14096 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14097 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14098 /// narrowing the load and store if it would end up being a win for performance
14099 /// or code size.
14100 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14101   StoreSDNode *ST  = cast<StoreSDNode>(N);
14102   if (ST->isVolatile())
14103     return SDValue();
14104 
14105   SDValue Chain = ST->getChain();
14106   SDValue Value = ST->getValue();
14107   SDValue Ptr   = ST->getBasePtr();
14108   EVT VT = Value.getValueType();
14109 
14110   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14111     return SDValue();
14112 
14113   unsigned Opc = Value.getOpcode();
14114 
14115   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14116   // is a byte mask indicating a consecutive number of bytes, check to see if
14117   // Y is known to provide just those bytes.  If so, we try to replace the
14118   // load + replace + store sequence with a single (narrower) store, which makes
14119   // the load dead.
14120   if (Opc == ISD::OR) {
14121     std::pair<unsigned, unsigned> MaskedLoad;
14122     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14123     if (MaskedLoad.first)
14124       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14125                                                   Value.getOperand(1), ST,this))
14126         return SDValue(NewST, 0);
14127 
14128     // Or is commutative, so try swapping X and Y.
14129     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14130     if (MaskedLoad.first)
14131       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14132                                                   Value.getOperand(0), ST,this))
14133         return SDValue(NewST, 0);
14134   }
14135 
14136   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14137       Value.getOperand(1).getOpcode() != ISD::Constant)
14138     return SDValue();
14139 
14140   SDValue N0 = Value.getOperand(0);
14141   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14142       Chain == SDValue(N0.getNode(), 1)) {
14143     LoadSDNode *LD = cast<LoadSDNode>(N0);
14144     if (LD->getBasePtr() != Ptr ||
14145         LD->getPointerInfo().getAddrSpace() !=
14146         ST->getPointerInfo().getAddrSpace())
14147       return SDValue();
14148 
14149     // Find the type to narrow it the load / op / store to.
14150     SDValue N1 = Value.getOperand(1);
14151     unsigned BitWidth = N1.getValueSizeInBits();
14152     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14153     if (Opc == ISD::AND)
14154       Imm ^= APInt::getAllOnesValue(BitWidth);
14155     if (Imm == 0 || Imm.isAllOnesValue())
14156       return SDValue();
14157     unsigned ShAmt = Imm.countTrailingZeros();
14158     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14159     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14160     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14161     // The narrowing should be profitable, the load/store operation should be
14162     // legal (or custom) and the store size should be equal to the NewVT width.
14163     while (NewBW < BitWidth &&
14164            (NewVT.getStoreSizeInBits() != NewBW ||
14165             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14166             !TLI.isNarrowingProfitable(VT, NewVT))) {
14167       NewBW = NextPowerOf2(NewBW);
14168       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14169     }
14170     if (NewBW >= BitWidth)
14171       return SDValue();
14172 
14173     // If the lsb changed does not start at the type bitwidth boundary,
14174     // start at the previous one.
14175     if (ShAmt % NewBW)
14176       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14177     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14178                                    std::min(BitWidth, ShAmt + NewBW));
14179     if ((Imm & Mask) == Imm) {
14180       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14181       if (Opc == ISD::AND)
14182         NewImm ^= APInt::getAllOnesValue(NewBW);
14183       uint64_t PtrOff = ShAmt / 8;
14184       // For big endian targets, we need to adjust the offset to the pointer to
14185       // load the correct bytes.
14186       if (DAG.getDataLayout().isBigEndian())
14187         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14188 
14189       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14190       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14191       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14192         return SDValue();
14193 
14194       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14195                                    Ptr.getValueType(), Ptr,
14196                                    DAG.getConstant(PtrOff, SDLoc(LD),
14197                                                    Ptr.getValueType()));
14198       SDValue NewLD =
14199           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14200                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14201                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14202       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14203                                    DAG.getConstant(NewImm, SDLoc(Value),
14204                                                    NewVT));
14205       SDValue NewST =
14206           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14207                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14208 
14209       AddToWorklist(NewPtr.getNode());
14210       AddToWorklist(NewLD.getNode());
14211       AddToWorklist(NewVal.getNode());
14212       WorklistRemover DeadNodes(*this);
14213       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14214       ++OpsNarrowed;
14215       return NewST;
14216     }
14217   }
14218 
14219   return SDValue();
14220 }
14221 
14222 /// For a given floating point load / store pair, if the load value isn't used
14223 /// by any other operations, then consider transforming the pair to integer
14224 /// load / store operations if the target deems the transformation profitable.
14225 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14226   StoreSDNode *ST  = cast<StoreSDNode>(N);
14227   SDValue Chain = ST->getChain();
14228   SDValue Value = ST->getValue();
14229   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14230       Value.hasOneUse() &&
14231       Chain == SDValue(Value.getNode(), 1)) {
14232     LoadSDNode *LD = cast<LoadSDNode>(Value);
14233     EVT VT = LD->getMemoryVT();
14234     if (!VT.isFloatingPoint() ||
14235         VT != ST->getMemoryVT() ||
14236         LD->isNonTemporal() ||
14237         ST->isNonTemporal() ||
14238         LD->getPointerInfo().getAddrSpace() != 0 ||
14239         ST->getPointerInfo().getAddrSpace() != 0)
14240       return SDValue();
14241 
14242     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14243     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14244         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14245         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14246         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14247       return SDValue();
14248 
14249     unsigned LDAlign = LD->getAlignment();
14250     unsigned STAlign = ST->getAlignment();
14251     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14252     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14253     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14254       return SDValue();
14255 
14256     SDValue NewLD =
14257         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14258                     LD->getPointerInfo(), LDAlign);
14259 
14260     SDValue NewST =
14261         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14262                      ST->getPointerInfo(), STAlign);
14263 
14264     AddToWorklist(NewLD.getNode());
14265     AddToWorklist(NewST.getNode());
14266     WorklistRemover DeadNodes(*this);
14267     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14268     ++LdStFP2Int;
14269     return NewST;
14270   }
14271 
14272   return SDValue();
14273 }
14274 
14275 // This is a helper function for visitMUL to check the profitability
14276 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14277 // MulNode is the original multiply, AddNode is (add x, c1),
14278 // and ConstNode is c2.
14279 //
14280 // If the (add x, c1) has multiple uses, we could increase
14281 // the number of adds if we make this transformation.
14282 // It would only be worth doing this if we can remove a
14283 // multiply in the process. Check for that here.
14284 // To illustrate:
14285 //     (A + c1) * c3
14286 //     (A + c2) * c3
14287 // We're checking for cases where we have common "c3 * A" expressions.
14288 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14289                                               SDValue &AddNode,
14290                                               SDValue &ConstNode) {
14291   APInt Val;
14292 
14293   // If the add only has one use, this would be OK to do.
14294   if (AddNode.getNode()->hasOneUse())
14295     return true;
14296 
14297   // Walk all the users of the constant with which we're multiplying.
14298   for (SDNode *Use : ConstNode->uses()) {
14299     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14300       continue;
14301 
14302     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14303       SDNode *OtherOp;
14304       SDNode *MulVar = AddNode.getOperand(0).getNode();
14305 
14306       // OtherOp is what we're multiplying against the constant.
14307       if (Use->getOperand(0) == ConstNode)
14308         OtherOp = Use->getOperand(1).getNode();
14309       else
14310         OtherOp = Use->getOperand(0).getNode();
14311 
14312       // Check to see if multiply is with the same operand of our "add".
14313       //
14314       //     ConstNode  = CONST
14315       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14316       //     ...
14317       //     AddNode  = (A + c1)  <-- MulVar is A.
14318       //         = AddNode * ConstNode   <-- current visiting instruction.
14319       //
14320       // If we make this transformation, we will have a common
14321       // multiply (ConstNode * A) that we can save.
14322       if (OtherOp == MulVar)
14323         return true;
14324 
14325       // Now check to see if a future expansion will give us a common
14326       // multiply.
14327       //
14328       //     ConstNode  = CONST
14329       //     AddNode    = (A + c1)
14330       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14331       //     ...
14332       //     OtherOp = (A + c2)
14333       //     Use     = OtherOp * ConstNode <-- visiting Use.
14334       //
14335       // If we make this transformation, we will have a common
14336       // multiply (CONST * A) after we also do the same transformation
14337       // to the "t2" instruction.
14338       if (OtherOp->getOpcode() == ISD::ADD &&
14339           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14340           OtherOp->getOperand(0).getNode() == MulVar)
14341         return true;
14342     }
14343   }
14344 
14345   // Didn't find a case where this would be profitable.
14346   return false;
14347 }
14348 
14349 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14350                                          unsigned NumStores) {
14351   SmallVector<SDValue, 8> Chains;
14352   SmallPtrSet<const SDNode *, 8> Visited;
14353   SDLoc StoreDL(StoreNodes[0].MemNode);
14354 
14355   for (unsigned i = 0; i < NumStores; ++i) {
14356     Visited.insert(StoreNodes[i].MemNode);
14357   }
14358 
14359   // don't include nodes that are children
14360   for (unsigned i = 0; i < NumStores; ++i) {
14361     if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
14362       Chains.push_back(StoreNodes[i].MemNode->getChain());
14363   }
14364 
14365   assert(Chains.size() > 0 && "Chain should have generated a chain");
14366   return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
14367 }
14368 
14369 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14370     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14371     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14372   // Make sure we have something to merge.
14373   if (NumStores < 2)
14374     return false;
14375 
14376   // The latest Node in the DAG.
14377   SDLoc DL(StoreNodes[0].MemNode);
14378 
14379   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14380   unsigned SizeInBits = NumStores * ElementSizeBits;
14381   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14382 
14383   EVT StoreTy;
14384   if (UseVector) {
14385     unsigned Elts = NumStores * NumMemElts;
14386     // Get the type for the merged vector store.
14387     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14388   } else
14389     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14390 
14391   SDValue StoredVal;
14392   if (UseVector) {
14393     if (IsConstantSrc) {
14394       SmallVector<SDValue, 8> BuildVector;
14395       for (unsigned I = 0; I != NumStores; ++I) {
14396         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14397         SDValue Val = St->getValue();
14398         // If constant is of the wrong type, convert it now.
14399         if (MemVT != Val.getValueType()) {
14400           Val = peekThroughBitcasts(Val);
14401           // Deal with constants of wrong size.
14402           if (ElementSizeBits != Val.getValueSizeInBits()) {
14403             EVT IntMemVT =
14404                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14405             if (isa<ConstantFPSDNode>(Val)) {
14406               // Not clear how to truncate FP values.
14407               return false;
14408             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14409               Val = DAG.getConstant(C->getAPIntValue()
14410                                         .zextOrTrunc(Val.getValueSizeInBits())
14411                                         .zextOrTrunc(ElementSizeBits),
14412                                     SDLoc(C), IntMemVT);
14413           }
14414           // Make sure correctly size type is the correct type.
14415           Val = DAG.getBitcast(MemVT, Val);
14416         }
14417         BuildVector.push_back(Val);
14418       }
14419       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14420                                                : ISD::BUILD_VECTOR,
14421                               DL, StoreTy, BuildVector);
14422     } else {
14423       SmallVector<SDValue, 8> Ops;
14424       for (unsigned i = 0; i < NumStores; ++i) {
14425         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14426         SDValue Val = peekThroughBitcasts(St->getValue());
14427         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14428         // type MemVT. If the underlying value is not the correct
14429         // type, but it is an extraction of an appropriate vector we
14430         // can recast Val to be of the correct type. This may require
14431         // converting between EXTRACT_VECTOR_ELT and
14432         // EXTRACT_SUBVECTOR.
14433         if ((MemVT != Val.getValueType()) &&
14434             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14435              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14436           EVT MemVTScalarTy = MemVT.getScalarType();
14437           // We may need to add a bitcast here to get types to line up.
14438           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14439             Val = DAG.getBitcast(MemVT, Val);
14440           } else {
14441             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14442                                             : ISD::EXTRACT_VECTOR_ELT;
14443             SDValue Vec = Val.getOperand(0);
14444             SDValue Idx = Val.getOperand(1);
14445             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14446           }
14447         }
14448         Ops.push_back(Val);
14449       }
14450 
14451       // Build the extracted vector elements back into a vector.
14452       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14453                                                : ISD::BUILD_VECTOR,
14454                               DL, StoreTy, Ops);
14455     }
14456   } else {
14457     // We should always use a vector store when merging extracted vector
14458     // elements, so this path implies a store of constants.
14459     assert(IsConstantSrc && "Merged vector elements should use vector store");
14460 
14461     APInt StoreInt(SizeInBits, 0);
14462 
14463     // Construct a single integer constant which is made of the smaller
14464     // constant inputs.
14465     bool IsLE = DAG.getDataLayout().isLittleEndian();
14466     for (unsigned i = 0; i < NumStores; ++i) {
14467       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14468       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14469 
14470       SDValue Val = St->getValue();
14471       Val = peekThroughBitcasts(Val);
14472       StoreInt <<= ElementSizeBits;
14473       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14474         StoreInt |= C->getAPIntValue()
14475                         .zextOrTrunc(ElementSizeBits)
14476                         .zextOrTrunc(SizeInBits);
14477       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14478         StoreInt |= C->getValueAPF()
14479                         .bitcastToAPInt()
14480                         .zextOrTrunc(ElementSizeBits)
14481                         .zextOrTrunc(SizeInBits);
14482         // If fp truncation is necessary give up for now.
14483         if (MemVT.getSizeInBits() != ElementSizeBits)
14484           return false;
14485       } else {
14486         llvm_unreachable("Invalid constant element type");
14487       }
14488     }
14489 
14490     // Create the new Load and Store operations.
14491     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14492   }
14493 
14494   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14495   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14496 
14497   // make sure we use trunc store if it's necessary to be legal.
14498   SDValue NewStore;
14499   if (!UseTrunc) {
14500     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14501                             FirstInChain->getPointerInfo(),
14502                             FirstInChain->getAlignment());
14503   } else { // Must be realized as a trunc store
14504     EVT LegalizedStoredValTy =
14505         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14506     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14507     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14508     SDValue ExtendedStoreVal =
14509         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14510                         LegalizedStoredValTy);
14511     NewStore = DAG.getTruncStore(
14512         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14513         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14514         FirstInChain->getAlignment(),
14515         FirstInChain->getMemOperand()->getFlags());
14516   }
14517 
14518   // Replace all merged stores with the new store.
14519   for (unsigned i = 0; i < NumStores; ++i)
14520     CombineTo(StoreNodes[i].MemNode, NewStore);
14521 
14522   AddToWorklist(NewChain.getNode());
14523   return true;
14524 }
14525 
14526 void DAGCombiner::getStoreMergeCandidates(
14527     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14528     SDNode *&RootNode) {
14529   // This holds the base pointer, index, and the offset in bytes from the base
14530   // pointer.
14531   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14532   EVT MemVT = St->getMemoryVT();
14533 
14534   SDValue Val = peekThroughBitcasts(St->getValue());
14535   // We must have a base and an offset.
14536   if (!BasePtr.getBase().getNode())
14537     return;
14538 
14539   // Do not handle stores to undef base pointers.
14540   if (BasePtr.getBase().isUndef())
14541     return;
14542 
14543   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14544   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14545                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14546   bool IsLoadSrc = isa<LoadSDNode>(Val);
14547   BaseIndexOffset LBasePtr;
14548   // Match on loadbaseptr if relevant.
14549   EVT LoadVT;
14550   if (IsLoadSrc) {
14551     auto *Ld = cast<LoadSDNode>(Val);
14552     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14553     LoadVT = Ld->getMemoryVT();
14554     // Load and store should be the same type.
14555     if (MemVT != LoadVT)
14556       return;
14557     // Loads must only have one use.
14558     if (!Ld->hasNUsesOfValue(1, 0))
14559       return;
14560     // The memory operands must not be volatile.
14561     if (Ld->isVolatile() || Ld->isIndexed())
14562       return;
14563   }
14564   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14565                             int64_t &Offset) -> bool {
14566     if (Other->isVolatile() || Other->isIndexed())
14567       return false;
14568     SDValue Val = peekThroughBitcasts(Other->getValue());
14569     // Allow merging constants of different types as integers.
14570     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14571                                            : Other->getMemoryVT() != MemVT;
14572     if (IsLoadSrc) {
14573       if (NoTypeMatch)
14574         return false;
14575       // The Load's Base Ptr must also match
14576       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14577         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14578         if (LoadVT != OtherLd->getMemoryVT())
14579           return false;
14580         // Loads must only have one use.
14581         if (!OtherLd->hasNUsesOfValue(1, 0))
14582           return false;
14583         // The memory operands must not be volatile.
14584         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14585           return false;
14586         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14587           return false;
14588       } else
14589         return false;
14590     }
14591     if (IsConstantSrc) {
14592       if (NoTypeMatch)
14593         return false;
14594       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14595         return false;
14596     }
14597     if (IsExtractVecSrc) {
14598       // Do not merge truncated stores here.
14599       if (Other->isTruncatingStore())
14600         return false;
14601       if (!MemVT.bitsEq(Val.getValueType()))
14602         return false;
14603       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14604           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14605         return false;
14606     }
14607     Ptr = BaseIndexOffset::match(Other, DAG);
14608     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14609   };
14610 
14611   // We looking for a root node which is an ancestor to all mergable
14612   // stores. We search up through a load, to our root and then down
14613   // through all children. For instance we will find Store{1,2,3} if
14614   // St is Store1, Store2. or Store3 where the root is not a load
14615   // which always true for nonvolatile ops. TODO: Expand
14616   // the search to find all valid candidates through multiple layers of loads.
14617   //
14618   // Root
14619   // |-------|-------|
14620   // Load    Load    Store3
14621   // |       |
14622   // Store1   Store2
14623   //
14624   // FIXME: We should be able to climb and
14625   // descend TokenFactors to find candidates as well.
14626 
14627   RootNode = St->getChain().getNode();
14628 
14629   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14630     RootNode = Ldn->getChain().getNode();
14631     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14632       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14633         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14634           if (I2.getOperandNo() == 0)
14635             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14636               BaseIndexOffset Ptr;
14637               int64_t PtrDiff;
14638               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14639                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14640             }
14641   } else
14642     for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
14643       if (I.getOperandNo() == 0)
14644         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14645           BaseIndexOffset Ptr;
14646           int64_t PtrDiff;
14647           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14648             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14649         }
14650 }
14651 
14652 // We need to check that merging these stores does not cause a loop in
14653 // the DAG. Any store candidate may depend on another candidate
14654 // indirectly through its operand (we already consider dependencies
14655 // through the chain). Check in parallel by searching up from
14656 // non-chain operands of candidates.
14657 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14658     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14659     SDNode *RootNode) {
14660   // FIXME: We should be able to truncate a full search of
14661   // predecessors by doing a BFS and keeping tabs the originating
14662   // stores from which worklist nodes come from in a similar way to
14663   // TokenFactor simplfication.
14664 
14665   SmallPtrSet<const SDNode *, 32> Visited;
14666   SmallVector<const SDNode *, 8> Worklist;
14667 
14668   // RootNode is a predecessor to all candidates so we need not search
14669   // past it. Add RootNode (peeking through TokenFactors). Do not count
14670   // these towards size check.
14671 
14672   Worklist.push_back(RootNode);
14673   while (!Worklist.empty()) {
14674     auto N = Worklist.pop_back_val();
14675     if (!Visited.insert(N).second)
14676       continue; // Already present in Visited.
14677     if (N->getOpcode() == ISD::TokenFactor) {
14678       for (SDValue Op : N->ops())
14679         Worklist.push_back(Op.getNode());
14680     }
14681   }
14682 
14683   // Don't count pruning nodes towards max.
14684   unsigned int Max = 1024 + Visited.size();
14685   // Search Ops of store candidates.
14686   for (unsigned i = 0; i < NumStores; ++i) {
14687     SDNode *N = StoreNodes[i].MemNode;
14688     // Of the 4 Store Operands:
14689     //   * Chain (Op 0) -> We have already considered these
14690     //                    in candidate selection and can be
14691     //                    safely ignored
14692     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14693     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14694     //                       but aren't necessarily fromt the same base node, so
14695     //                       cycles possible (e.g. via indexed store).
14696     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14697     //               non-indexed stores). Not constant on all targets (e.g. ARM)
14698     //               and so can participate in a cycle.
14699     for (unsigned j = 1; j < N->getNumOperands(); ++j)
14700       Worklist.push_back(N->getOperand(j).getNode());
14701   }
14702   // Search through DAG. We can stop early if we find a store node.
14703   for (unsigned i = 0; i < NumStores; ++i)
14704     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14705                                      Max))
14706       return false;
14707   return true;
14708 }
14709 
14710 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14711   if (OptLevel == CodeGenOpt::None)
14712     return false;
14713 
14714   EVT MemVT = St->getMemoryVT();
14715   int64_t ElementSizeBytes = MemVT.getStoreSize();
14716   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14717 
14718   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14719     return false;
14720 
14721   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14722       Attribute::NoImplicitFloat);
14723 
14724   // This function cannot currently deal with non-byte-sized memory sizes.
14725   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14726     return false;
14727 
14728   if (!MemVT.isSimple())
14729     return false;
14730 
14731   // Perform an early exit check. Do not bother looking at stored values that
14732   // are not constants, loads, or extracted vector elements.
14733   SDValue StoredVal = peekThroughBitcasts(St->getValue());
14734   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14735   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14736                        isa<ConstantFPSDNode>(StoredVal);
14737   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14738                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14739 
14740   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14741     return false;
14742 
14743   SmallVector<MemOpLink, 8> StoreNodes;
14744   SDNode *RootNode;
14745   // Find potential store merge candidates by searching through chain sub-DAG
14746   getStoreMergeCandidates(St, StoreNodes, RootNode);
14747 
14748   // Check if there is anything to merge.
14749   if (StoreNodes.size() < 2)
14750     return false;
14751 
14752   // Sort the memory operands according to their distance from the
14753   // base pointer.
14754   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14755     return LHS.OffsetFromBase < RHS.OffsetFromBase;
14756   });
14757 
14758   // Store Merge attempts to merge the lowest stores. This generally
14759   // works out as if successful, as the remaining stores are checked
14760   // after the first collection of stores is merged. However, in the
14761   // case that a non-mergeable store is found first, e.g., {p[-2],
14762   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
14763   // mergeable cases. To prevent this, we prune such stores from the
14764   // front of StoreNodes here.
14765 
14766   bool RV = false;
14767   while (StoreNodes.size() > 1) {
14768     unsigned StartIdx = 0;
14769     while ((StartIdx + 1 < StoreNodes.size()) &&
14770            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
14771                StoreNodes[StartIdx + 1].OffsetFromBase)
14772       ++StartIdx;
14773 
14774     // Bail if we don't have enough candidates to merge.
14775     if (StartIdx + 1 >= StoreNodes.size())
14776       return RV;
14777 
14778     if (StartIdx)
14779       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
14780 
14781     // Scan the memory operations on the chain and find the first
14782     // non-consecutive store memory address.
14783     unsigned NumConsecutiveStores = 1;
14784     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
14785     // Check that the addresses are consecutive starting from the second
14786     // element in the list of stores.
14787     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
14788       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
14789       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
14790         break;
14791       NumConsecutiveStores = i + 1;
14792     }
14793 
14794     if (NumConsecutiveStores < 2) {
14795       StoreNodes.erase(StoreNodes.begin(),
14796                        StoreNodes.begin() + NumConsecutiveStores);
14797       continue;
14798     }
14799 
14800     // The node with the lowest store address.
14801     LLVMContext &Context = *DAG.getContext();
14802     const DataLayout &DL = DAG.getDataLayout();
14803 
14804     // Store the constants into memory as one consecutive store.
14805     if (IsConstantSrc) {
14806       while (NumConsecutiveStores >= 2) {
14807         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14808         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14809         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14810         unsigned LastLegalType = 1;
14811         unsigned LastLegalVectorType = 1;
14812         bool LastIntegerTrunc = false;
14813         bool NonZero = false;
14814         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
14815         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14816           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
14817           SDValue StoredVal = ST->getValue();
14818           bool IsElementZero = false;
14819           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
14820             IsElementZero = C->isNullValue();
14821           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
14822             IsElementZero = C->getConstantFPValue()->isNullValue();
14823           if (IsElementZero) {
14824             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
14825               FirstZeroAfterNonZero = i;
14826           }
14827           NonZero |= !IsElementZero;
14828 
14829           // Find a legal type for the constant store.
14830           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
14831           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
14832           bool IsFast = false;
14833 
14834           // Break early when size is too large to be legal.
14835           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
14836             break;
14837 
14838           if (TLI.isTypeLegal(StoreTy) &&
14839               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
14840               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14841                                      FirstStoreAlign, &IsFast) &&
14842               IsFast) {
14843             LastIntegerTrunc = false;
14844             LastLegalType = i + 1;
14845             // Or check whether a truncstore is legal.
14846           } else if (TLI.getTypeAction(Context, StoreTy) ==
14847                      TargetLowering::TypePromoteInteger) {
14848             EVT LegalizedStoredValTy =
14849                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
14850             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
14851                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
14852                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
14853                                        FirstStoreAlign, &IsFast) &&
14854                 IsFast) {
14855               LastIntegerTrunc = true;
14856               LastLegalType = i + 1;
14857             }
14858           }
14859 
14860           // We only use vectors if the constant is known to be zero or the
14861           // target allows it and the function is not marked with the
14862           // noimplicitfloat attribute.
14863           if ((!NonZero ||
14864                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
14865               !NoVectors) {
14866             // Find a legal type for the vector store.
14867             unsigned Elts = (i + 1) * NumMemElts;
14868             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
14869             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
14870                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14871                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14872                                        FirstStoreAlign, &IsFast) &&
14873                 IsFast)
14874               LastLegalVectorType = i + 1;
14875           }
14876         }
14877 
14878         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
14879         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
14880 
14881         // Check if we found a legal integer type that creates a meaningful
14882         // merge.
14883         if (NumElem < 2) {
14884           // We know that candidate stores are in order and of correct
14885           // shape. While there is no mergeable sequence from the
14886           // beginning one may start later in the sequence. The only
14887           // reason a merge of size N could have failed where another of
14888           // the same size would not have, is if the alignment has
14889           // improved or we've dropped a non-zero value. Drop as many
14890           // candidates as we can here.
14891           unsigned NumSkip = 1;
14892           while (
14893               (NumSkip < NumConsecutiveStores) &&
14894               (NumSkip < FirstZeroAfterNonZero) &&
14895               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14896             NumSkip++;
14897 
14898           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14899           NumConsecutiveStores -= NumSkip;
14900           continue;
14901         }
14902 
14903         // Check that we can merge these candidates without causing a cycle.
14904         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
14905                                                       RootNode)) {
14906           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14907           NumConsecutiveStores -= NumElem;
14908           continue;
14909         }
14910 
14911         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
14912                                               UseVector, LastIntegerTrunc);
14913 
14914         // Remove merged stores for next iteration.
14915         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
14916         NumConsecutiveStores -= NumElem;
14917       }
14918       continue;
14919     }
14920 
14921     // When extracting multiple vector elements, try to store them
14922     // in one vector store rather than a sequence of scalar stores.
14923     if (IsExtractVecSrc) {
14924       // Loop on Consecutive Stores on success.
14925       while (NumConsecutiveStores >= 2) {
14926         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14927         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
14928         unsigned FirstStoreAlign = FirstInChain->getAlignment();
14929         unsigned NumStoresToMerge = 1;
14930         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
14931           // Find a legal type for the vector store.
14932           unsigned Elts = (i + 1) * NumMemElts;
14933           EVT Ty =
14934               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14935           bool IsFast;
14936 
14937           // Break early when size is too large to be legal.
14938           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
14939             break;
14940 
14941           if (TLI.isTypeLegal(Ty) &&
14942               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
14943               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
14944                                      FirstStoreAlign, &IsFast) &&
14945               IsFast)
14946             NumStoresToMerge = i + 1;
14947         }
14948 
14949         // Check if we found a legal integer type creating a meaningful
14950         // merge.
14951         if (NumStoresToMerge < 2) {
14952           // We know that candidate stores are in order and of correct
14953           // shape. While there is no mergeable sequence from the
14954           // beginning one may start later in the sequence. The only
14955           // reason a merge of size N could have failed where another of
14956           // the same size would not have, is if the alignment has
14957           // improved. Drop as many candidates as we can here.
14958           unsigned NumSkip = 1;
14959           while (
14960               (NumSkip < NumConsecutiveStores) &&
14961               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
14962             NumSkip++;
14963 
14964           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
14965           NumConsecutiveStores -= NumSkip;
14966           continue;
14967         }
14968 
14969         // Check that we can merge these candidates without causing a cycle.
14970         if (!checkMergeStoreCandidatesForDependencies(
14971                 StoreNodes, NumStoresToMerge, RootNode)) {
14972           StoreNodes.erase(StoreNodes.begin(),
14973                            StoreNodes.begin() + NumStoresToMerge);
14974           NumConsecutiveStores -= NumStoresToMerge;
14975           continue;
14976         }
14977 
14978         RV |= MergeStoresOfConstantsOrVecElts(
14979             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
14980 
14981         StoreNodes.erase(StoreNodes.begin(),
14982                          StoreNodes.begin() + NumStoresToMerge);
14983         NumConsecutiveStores -= NumStoresToMerge;
14984       }
14985       continue;
14986     }
14987 
14988     // Below we handle the case of multiple consecutive stores that
14989     // come from multiple consecutive loads. We merge them into a single
14990     // wide load and a single wide store.
14991 
14992     // Look for load nodes which are used by the stored values.
14993     SmallVector<MemOpLink, 8> LoadNodes;
14994 
14995     // Find acceptable loads. Loads need to have the same chain (token factor),
14996     // must not be zext, volatile, indexed, and they must be consecutive.
14997     BaseIndexOffset LdBasePtr;
14998 
14999     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15000       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15001       SDValue Val = peekThroughBitcasts(St->getValue());
15002       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15003 
15004       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15005       // If this is not the first ptr that we check.
15006       int64_t LdOffset = 0;
15007       if (LdBasePtr.getBase().getNode()) {
15008         // The base ptr must be the same.
15009         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15010           break;
15011       } else {
15012         // Check that all other base pointers are the same as this one.
15013         LdBasePtr = LdPtr;
15014       }
15015 
15016       // We found a potential memory operand to merge.
15017       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15018     }
15019 
15020     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15021       // If we have load/store pair instructions and we only have two values,
15022       // don't bother merging.
15023       unsigned RequiredAlignment;
15024       if (LoadNodes.size() == 2 &&
15025           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15026           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15027         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15028         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15029         break;
15030       }
15031       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15032       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15033       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15034       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15035       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
15036       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15037 
15038       // Scan the memory operations on the chain and find the first
15039       // non-consecutive load memory address. These variables hold the index in
15040       // the store node array.
15041 
15042       unsigned LastConsecutiveLoad = 1;
15043 
15044       // This variable refers to the size and not index in the array.
15045       unsigned LastLegalVectorType = 1;
15046       unsigned LastLegalIntegerType = 1;
15047       bool isDereferenceable = true;
15048       bool DoIntegerTruncate = false;
15049       StartAddress = LoadNodes[0].OffsetFromBase;
15050       SDValue FirstChain = FirstLoad->getChain();
15051       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15052         // All loads must share the same chain.
15053         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15054           break;
15055 
15056         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15057         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15058           break;
15059         LastConsecutiveLoad = i;
15060 
15061         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15062           isDereferenceable = false;
15063 
15064         // Find a legal type for the vector store.
15065         unsigned Elts = (i + 1) * NumMemElts;
15066         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15067 
15068         // Break early when size is too large to be legal.
15069         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15070           break;
15071 
15072         bool IsFastSt, IsFastLd;
15073         if (TLI.isTypeLegal(StoreTy) &&
15074             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15075             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15076                                    FirstStoreAlign, &IsFastSt) &&
15077             IsFastSt &&
15078             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15079                                    FirstLoadAlign, &IsFastLd) &&
15080             IsFastLd) {
15081           LastLegalVectorType = i + 1;
15082         }
15083 
15084         // Find a legal type for the integer store.
15085         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15086         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15087         if (TLI.isTypeLegal(StoreTy) &&
15088             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15089             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15090                                    FirstStoreAlign, &IsFastSt) &&
15091             IsFastSt &&
15092             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15093                                    FirstLoadAlign, &IsFastLd) &&
15094             IsFastLd) {
15095           LastLegalIntegerType = i + 1;
15096           DoIntegerTruncate = false;
15097           // Or check whether a truncstore and extload is legal.
15098         } else if (TLI.getTypeAction(Context, StoreTy) ==
15099                    TargetLowering::TypePromoteInteger) {
15100           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15101           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15102               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15103               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15104                                  StoreTy) &&
15105               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15106                                  StoreTy) &&
15107               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15108               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15109                                      FirstStoreAlign, &IsFastSt) &&
15110               IsFastSt &&
15111               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15112                                      FirstLoadAlign, &IsFastLd) &&
15113               IsFastLd) {
15114             LastLegalIntegerType = i + 1;
15115             DoIntegerTruncate = true;
15116           }
15117         }
15118       }
15119 
15120       // Only use vector types if the vector type is larger than the integer
15121       // type. If they are the same, use integers.
15122       bool UseVectorTy =
15123           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15124       unsigned LastLegalType =
15125           std::max(LastLegalVectorType, LastLegalIntegerType);
15126 
15127       // We add +1 here because the LastXXX variables refer to location while
15128       // the NumElem refers to array/index size.
15129       unsigned NumElem =
15130           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15131       NumElem = std::min(LastLegalType, NumElem);
15132 
15133       if (NumElem < 2) {
15134         // We know that candidate stores are in order and of correct
15135         // shape. While there is no mergeable sequence from the
15136         // beginning one may start later in the sequence. The only
15137         // reason a merge of size N could have failed where another of
15138         // the same size would not have is if the alignment or either
15139         // the load or store has improved. Drop as many candidates as we
15140         // can here.
15141         unsigned NumSkip = 1;
15142         while ((NumSkip < LoadNodes.size()) &&
15143                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15144                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15145           NumSkip++;
15146         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15147         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15148         NumConsecutiveStores -= NumSkip;
15149         continue;
15150       }
15151 
15152       // Check that we can merge these candidates without causing a cycle.
15153       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15154                                                     RootNode)) {
15155         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15156         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15157         NumConsecutiveStores -= NumElem;
15158         continue;
15159       }
15160 
15161       // Find if it is better to use vectors or integers to load and store
15162       // to memory.
15163       EVT JointMemOpVT;
15164       if (UseVectorTy) {
15165         // Find a legal type for the vector store.
15166         unsigned Elts = NumElem * NumMemElts;
15167         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15168       } else {
15169         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15170         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15171       }
15172 
15173       SDLoc LoadDL(LoadNodes[0].MemNode);
15174       SDLoc StoreDL(StoreNodes[0].MemNode);
15175 
15176       // The merged loads are required to have the same incoming chain, so
15177       // using the first's chain is acceptable.
15178 
15179       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15180       AddToWorklist(NewStoreChain.getNode());
15181 
15182       MachineMemOperand::Flags MMOFlags =
15183           isDereferenceable ? MachineMemOperand::MODereferenceable
15184                             : MachineMemOperand::MONone;
15185 
15186       SDValue NewLoad, NewStore;
15187       if (UseVectorTy || !DoIntegerTruncate) {
15188         NewLoad =
15189             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15190                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15191                         FirstLoadAlign, MMOFlags);
15192         NewStore = DAG.getStore(
15193             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15194             FirstInChain->getPointerInfo(), FirstStoreAlign);
15195       } else { // This must be the truncstore/extload case
15196         EVT ExtendedTy =
15197             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15198         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15199                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15200                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15201                                  FirstLoadAlign, MMOFlags);
15202         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15203                                      FirstInChain->getBasePtr(),
15204                                      FirstInChain->getPointerInfo(),
15205                                      JointMemOpVT, FirstInChain->getAlignment(),
15206                                      FirstInChain->getMemOperand()->getFlags());
15207       }
15208 
15209       // Transfer chain users from old loads to the new load.
15210       for (unsigned i = 0; i < NumElem; ++i) {
15211         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15212         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15213                                       SDValue(NewLoad.getNode(), 1));
15214       }
15215 
15216       // Replace the all stores with the new store. Recursively remove
15217       // corresponding value if its no longer used.
15218       for (unsigned i = 0; i < NumElem; ++i) {
15219         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15220         CombineTo(StoreNodes[i].MemNode, NewStore);
15221         if (Val.getNode()->use_empty())
15222           recursivelyDeleteUnusedNodes(Val.getNode());
15223       }
15224 
15225       RV = true;
15226       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15227       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15228       NumConsecutiveStores -= NumElem;
15229     }
15230   }
15231   return RV;
15232 }
15233 
15234 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15235   SDLoc SL(ST);
15236   SDValue ReplStore;
15237 
15238   // Replace the chain to avoid dependency.
15239   if (ST->isTruncatingStore()) {
15240     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15241                                   ST->getBasePtr(), ST->getMemoryVT(),
15242                                   ST->getMemOperand());
15243   } else {
15244     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15245                              ST->getMemOperand());
15246   }
15247 
15248   // Create token to keep both nodes around.
15249   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15250                               MVT::Other, ST->getChain(), ReplStore);
15251 
15252   // Make sure the new and old chains are cleaned up.
15253   AddToWorklist(Token.getNode());
15254 
15255   // Don't add users to work list.
15256   return CombineTo(ST, Token, false);
15257 }
15258 
15259 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15260   SDValue Value = ST->getValue();
15261   if (Value.getOpcode() == ISD::TargetConstantFP)
15262     return SDValue();
15263 
15264   SDLoc DL(ST);
15265 
15266   SDValue Chain = ST->getChain();
15267   SDValue Ptr = ST->getBasePtr();
15268 
15269   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15270 
15271   // NOTE: If the original store is volatile, this transform must not increase
15272   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15273   // processor operation but an i64 (which is not legal) requires two.  So the
15274   // transform should not be done in this case.
15275 
15276   SDValue Tmp;
15277   switch (CFP->getSimpleValueType(0).SimpleTy) {
15278   default:
15279     llvm_unreachable("Unknown FP type");
15280   case MVT::f16:    // We don't do this for these yet.
15281   case MVT::f80:
15282   case MVT::f128:
15283   case MVT::ppcf128:
15284     return SDValue();
15285   case MVT::f32:
15286     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15287         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15288       ;
15289       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15290                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15291                             MVT::i32);
15292       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15293     }
15294 
15295     return SDValue();
15296   case MVT::f64:
15297     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15298          !ST->isVolatile()) ||
15299         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15300       ;
15301       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15302                             getZExtValue(), SDLoc(CFP), MVT::i64);
15303       return DAG.getStore(Chain, DL, Tmp,
15304                           Ptr, ST->getMemOperand());
15305     }
15306 
15307     if (!ST->isVolatile() &&
15308         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15309       // Many FP stores are not made apparent until after legalize, e.g. for
15310       // argument passing.  Since this is so common, custom legalize the
15311       // 64-bit integer store into two 32-bit stores.
15312       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15313       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15314       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15315       if (DAG.getDataLayout().isBigEndian())
15316         std::swap(Lo, Hi);
15317 
15318       unsigned Alignment = ST->getAlignment();
15319       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15320       AAMDNodes AAInfo = ST->getAAInfo();
15321 
15322       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15323                                  ST->getAlignment(), MMOFlags, AAInfo);
15324       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15325                         DAG.getConstant(4, DL, Ptr.getValueType()));
15326       Alignment = MinAlign(Alignment, 4U);
15327       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15328                                  ST->getPointerInfo().getWithOffset(4),
15329                                  Alignment, MMOFlags, AAInfo);
15330       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15331                          St0, St1);
15332     }
15333 
15334     return SDValue();
15335   }
15336 }
15337 
15338 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15339   StoreSDNode *ST  = cast<StoreSDNode>(N);
15340   SDValue Chain = ST->getChain();
15341   SDValue Value = ST->getValue();
15342   SDValue Ptr   = ST->getBasePtr();
15343 
15344   // If this is a store of a bit convert, store the input value if the
15345   // resultant store does not need a higher alignment than the original.
15346   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15347       ST->isUnindexed()) {
15348     EVT SVT = Value.getOperand(0).getValueType();
15349     // If the store is volatile, we only want to change the store type if the
15350     // resulting store is legal. Otherwise we might increase the number of
15351     // memory accesses. We don't care if the original type was legal or not
15352     // as we assume software couldn't rely on the number of accesses of an
15353     // illegal type.
15354     if (((!LegalOperations && !ST->isVolatile()) ||
15355          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15356         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15357       unsigned OrigAlign = ST->getAlignment();
15358       bool Fast = false;
15359       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15360                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
15361           Fast) {
15362         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15363                             ST->getPointerInfo(), OrigAlign,
15364                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
15365       }
15366     }
15367   }
15368 
15369   // Turn 'store undef, Ptr' -> nothing.
15370   if (Value.isUndef() && ST->isUnindexed())
15371     return Chain;
15372 
15373   // Try to infer better alignment information than the store already has.
15374   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15375     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15376       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15377         SDValue NewStore =
15378             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15379                               ST->getMemoryVT(), Align,
15380                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
15381         // NewStore will always be N as we are only refining the alignment
15382         assert(NewStore.getNode() == N);
15383         (void)NewStore;
15384       }
15385     }
15386   }
15387 
15388   // Try transforming a pair floating point load / store ops to integer
15389   // load / store ops.
15390   if (SDValue NewST = TransformFPLoadStorePair(N))
15391     return NewST;
15392 
15393   if (ST->isUnindexed()) {
15394     // Walk up chain skipping non-aliasing memory nodes, on this store and any
15395     // adjacent stores.
15396     if (findBetterNeighborChains(ST)) {
15397       // replaceStoreChain uses CombineTo, which handled all of the worklist
15398       // manipulation. Return the original node to not do anything else.
15399       return SDValue(ST, 0);
15400     }
15401     Chain = ST->getChain();
15402   }
15403 
15404   // FIXME: is there such a thing as a truncating indexed store?
15405   if (ST->isTruncatingStore() && ST->isUnindexed() &&
15406       Value.getValueType().isInteger() &&
15407       (!isa<ConstantSDNode>(Value) ||
15408        !cast<ConstantSDNode>(Value)->isOpaque())) {
15409     // See if we can simplify the input to this truncstore with knowledge that
15410     // only the low bits are being used.  For example:
15411     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
15412     SDValue Shorter = DAG.GetDemandedBits(
15413         Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15414                                     ST->getMemoryVT().getScalarSizeInBits()));
15415     AddToWorklist(Value.getNode());
15416     if (Shorter.getNode())
15417       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
15418                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
15419 
15420     // Otherwise, see if we can simplify the operation with
15421     // SimplifyDemandedBits, which only works if the value has a single use.
15422     if (SimplifyDemandedBits(
15423             Value,
15424             APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15425                                  ST->getMemoryVT().getScalarSizeInBits()))) {
15426       // Re-visit the store if anything changed and the store hasn't been merged
15427       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15428       // node back to the worklist if necessary, but we also need to re-visit
15429       // the Store node itself.
15430       if (N->getOpcode() != ISD::DELETED_NODE)
15431         AddToWorklist(N);
15432       return SDValue(N, 0);
15433     }
15434   }
15435 
15436   // If this is a load followed by a store to the same location, then the store
15437   // is dead/noop.
15438   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15439     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15440         ST->isUnindexed() && !ST->isVolatile() &&
15441         // There can't be any side effects between the load and store, such as
15442         // a call or store.
15443         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15444       // The store is dead, remove it.
15445       return Chain;
15446     }
15447   }
15448 
15449   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15450     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15451         !ST1->isVolatile()) {
15452       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
15453           ST->getMemoryVT() == ST1->getMemoryVT()) {
15454         // If this is a store followed by a store with the same value to the
15455         // same location, then the store is dead/noop.
15456         return Chain;
15457       }
15458 
15459       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15460           !ST1->getBasePtr().isUndef()) {
15461         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
15462         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
15463         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
15464         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
15465         // If this is a store who's preceding store to a subset of the current
15466         // location and no one other node is chained to that store we can
15467         // effectively drop the store. Do not remove stores to undef as they may
15468         // be used as data sinks.
15469         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
15470           CombineTo(ST1, ST1->getChain());
15471           return SDValue();
15472         }
15473 
15474         // If ST stores to a subset of preceding store's write set, we may be
15475         // able to fold ST's value into the preceding stored value. As we know
15476         // the other uses of ST1's chain are unconcerned with ST, this folding
15477         // will not affect those nodes.
15478         int64_t BitOffset;
15479         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
15480                                BitOffset)) {
15481           SDValue ChainValue = ST1->getValue();
15482           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
15483             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
15484               APInt Val = C1->getAPIntValue();
15485               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
15486               // FIXME: Handle Big-endian mode.
15487               if (!DAG.getDataLayout().isBigEndian()) {
15488                 Val.insertBits(InsertVal, BitOffset);
15489                 SDValue NewSDVal =
15490                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
15491                                     C1->isTargetOpcode(), C1->isOpaque());
15492                 SDNode *NewST1 = DAG.UpdateNodeOperands(
15493                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
15494                     ST1->getOperand(3));
15495                 return CombineTo(ST, SDValue(NewST1, 0));
15496               }
15497             }
15498           }
15499         } // End ST subset of ST1 case.
15500       }
15501     }
15502   }
15503 
15504   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15505   // truncating store.  We can do this even if this is already a truncstore.
15506   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15507       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15508       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15509                             ST->getMemoryVT())) {
15510     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15511                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15512   }
15513 
15514   // Always perform this optimization before types are legal. If the target
15515   // prefers, also try this after legalization to catch stores that were created
15516   // by intrinsics or other nodes.
15517   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15518     while (true) {
15519       // There can be multiple store sequences on the same chain.
15520       // Keep trying to merge store sequences until we are unable to do so
15521       // or until we merge the last store on the chain.
15522       bool Changed = MergeConsecutiveStores(ST);
15523       if (!Changed) break;
15524       // Return N as merge only uses CombineTo and no worklist clean
15525       // up is necessary.
15526       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15527         return SDValue(N, 0);
15528     }
15529   }
15530 
15531   // Try transforming N to an indexed store.
15532   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15533     return SDValue(N, 0);
15534 
15535   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15536   //
15537   // Make sure to do this only after attempting to merge stores in order to
15538   //  avoid changing the types of some subset of stores due to visit order,
15539   //  preventing their merging.
15540   if (isa<ConstantFPSDNode>(ST->getValue())) {
15541     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15542       return NewSt;
15543   }
15544 
15545   if (SDValue NewSt = splitMergedValStore(ST))
15546     return NewSt;
15547 
15548   return ReduceLoadOpStoreWidth(N);
15549 }
15550 
15551 /// For the instruction sequence of store below, F and I values
15552 /// are bundled together as an i64 value before being stored into memory.
15553 /// Sometimes it is more efficent to generate separate stores for F and I,
15554 /// which can remove the bitwise instructions or sink them to colder places.
15555 ///
15556 ///   (store (or (zext (bitcast F to i32) to i64),
15557 ///              (shl (zext I to i64), 32)), addr)  -->
15558 ///   (store F, addr) and (store I, addr+4)
15559 ///
15560 /// Similarly, splitting for other merged store can also be beneficial, like:
15561 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15562 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15563 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15564 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15565 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15566 ///
15567 /// We allow each target to determine specifically which kind of splitting is
15568 /// supported.
15569 ///
15570 /// The store patterns are commonly seen from the simple code snippet below
15571 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15572 ///   void goo(const std::pair<int, float> &);
15573 ///   hoo() {
15574 ///     ...
15575 ///     goo(std::make_pair(tmp, ftmp));
15576 ///     ...
15577 ///   }
15578 ///
15579 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15580   if (OptLevel == CodeGenOpt::None)
15581     return SDValue();
15582 
15583   SDValue Val = ST->getValue();
15584   SDLoc DL(ST);
15585 
15586   // Match OR operand.
15587   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15588     return SDValue();
15589 
15590   // Match SHL operand and get Lower and Higher parts of Val.
15591   SDValue Op1 = Val.getOperand(0);
15592   SDValue Op2 = Val.getOperand(1);
15593   SDValue Lo, Hi;
15594   if (Op1.getOpcode() != ISD::SHL) {
15595     std::swap(Op1, Op2);
15596     if (Op1.getOpcode() != ISD::SHL)
15597       return SDValue();
15598   }
15599   Lo = Op2;
15600   Hi = Op1.getOperand(0);
15601   if (!Op1.hasOneUse())
15602     return SDValue();
15603 
15604   // Match shift amount to HalfValBitSize.
15605   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15606   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15607   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15608     return SDValue();
15609 
15610   // Lo and Hi are zero-extended from int with size less equal than 32
15611   // to i64.
15612   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15613       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15614       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15615       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15616       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15617       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15618     return SDValue();
15619 
15620   // Use the EVT of low and high parts before bitcast as the input
15621   // of target query.
15622   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15623                   ? Lo.getOperand(0).getValueType()
15624                   : Lo.getValueType();
15625   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15626                    ? Hi.getOperand(0).getValueType()
15627                    : Hi.getValueType();
15628   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15629     return SDValue();
15630 
15631   // Start to split store.
15632   unsigned Alignment = ST->getAlignment();
15633   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15634   AAMDNodes AAInfo = ST->getAAInfo();
15635 
15636   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15637   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15638   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15639   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15640 
15641   SDValue Chain = ST->getChain();
15642   SDValue Ptr = ST->getBasePtr();
15643   // Lower value store.
15644   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15645                              ST->getAlignment(), MMOFlags, AAInfo);
15646   Ptr =
15647       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15648                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15649   // Higher value store.
15650   SDValue St1 =
15651       DAG.getStore(St0, DL, Hi, Ptr,
15652                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15653                    Alignment / 2, MMOFlags, AAInfo);
15654   return St1;
15655 }
15656 
15657 /// Convert a disguised subvector insertion into a shuffle:
15658 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15659 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15660 /// Note: We do not use an insert_subvector node because that requires a legal
15661 /// subvector type.
15662 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15663   SDValue InsertVal = N->getOperand(1);
15664   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15665       !InsertVal.getOperand(0).getValueType().isVector())
15666     return SDValue();
15667 
15668   SDValue SubVec = InsertVal.getOperand(0);
15669   SDValue DestVec = N->getOperand(0);
15670   EVT SubVecVT = SubVec.getValueType();
15671   EVT VT = DestVec.getValueType();
15672   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15673   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15674   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15675 
15676   // Step 1: Create a shuffle mask that implements this insert operation. The
15677   // vector that we are inserting into will be operand 0 of the shuffle, so
15678   // those elements are just 'i'. The inserted subvector is in the first
15679   // positions of operand 1 of the shuffle. Example:
15680   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15681   SmallVector<int, 16> Mask(NumMaskVals);
15682   for (unsigned i = 0; i != NumMaskVals; ++i) {
15683     if (i / NumSrcElts == InsIndex)
15684       Mask[i] = (i % NumSrcElts) + NumMaskVals;
15685     else
15686       Mask[i] = i;
15687   }
15688 
15689   // Bail out if the target can not handle the shuffle we want to create.
15690   EVT SubVecEltVT = SubVecVT.getVectorElementType();
15691   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15692   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15693     return SDValue();
15694 
15695   // Step 2: Create a wide vector from the inserted source vector by appending
15696   // undefined elements. This is the same size as our destination vector.
15697   SDLoc DL(N);
15698   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15699   ConcatOps[0] = SubVec;
15700   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15701 
15702   // Step 3: Shuffle in the padded subvector.
15703   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15704   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15705   AddToWorklist(PaddedSubV.getNode());
15706   AddToWorklist(DestVecBC.getNode());
15707   AddToWorklist(Shuf.getNode());
15708   return DAG.getBitcast(VT, Shuf);
15709 }
15710 
15711 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15712   SDValue InVec = N->getOperand(0);
15713   SDValue InVal = N->getOperand(1);
15714   SDValue EltNo = N->getOperand(2);
15715   SDLoc DL(N);
15716 
15717   // If the inserted element is an UNDEF, just use the input vector.
15718   if (InVal.isUndef())
15719     return InVec;
15720 
15721   EVT VT = InVec.getValueType();
15722   unsigned NumElts = VT.getVectorNumElements();
15723 
15724   // Remove redundant insertions:
15725   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
15726   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15727       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
15728     return InVec;
15729 
15730   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
15731   if (!IndexC) {
15732     // If this is variable insert to undef vector, it might be better to splat:
15733     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
15734     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
15735       SmallVector<SDValue, 8> Ops(NumElts, InVal);
15736       return DAG.getBuildVector(VT, DL, Ops);
15737     }
15738     return SDValue();
15739   }
15740 
15741   // We must know which element is being inserted for folds below here.
15742   unsigned Elt = IndexC->getZExtValue();
15743   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
15744     return Shuf;
15745 
15746   // Canonicalize insert_vector_elt dag nodes.
15747   // Example:
15748   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
15749   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
15750   //
15751   // Do this only if the child insert_vector node has one use; also
15752   // do this only if indices are both constants and Idx1 < Idx0.
15753   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
15754       && isa<ConstantSDNode>(InVec.getOperand(2))) {
15755     unsigned OtherElt = InVec.getConstantOperandVal(2);
15756     if (Elt < OtherElt) {
15757       // Swap nodes.
15758       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
15759                                   InVec.getOperand(0), InVal, EltNo);
15760       AddToWorklist(NewOp.getNode());
15761       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
15762                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
15763     }
15764   }
15765 
15766   // If we can't generate a legal BUILD_VECTOR, exit
15767   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
15768     return SDValue();
15769 
15770   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
15771   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
15772   // vector elements.
15773   SmallVector<SDValue, 8> Ops;
15774   // Do not combine these two vectors if the output vector will not replace
15775   // the input vector.
15776   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
15777     Ops.append(InVec.getNode()->op_begin(),
15778                InVec.getNode()->op_end());
15779   } else if (InVec.isUndef()) {
15780     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
15781   } else {
15782     return SDValue();
15783   }
15784   assert(Ops.size() == NumElts && "Unexpected vector size");
15785 
15786   // Insert the element
15787   if (Elt < Ops.size()) {
15788     // All the operands of BUILD_VECTOR must have the same type;
15789     // we enforce that here.
15790     EVT OpVT = Ops[0].getValueType();
15791     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
15792   }
15793 
15794   // Return the new vector
15795   return DAG.getBuildVector(VT, DL, Ops);
15796 }
15797 
15798 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
15799                                                   SDValue EltNo,
15800                                                   LoadSDNode *OriginalLoad) {
15801   assert(!OriginalLoad->isVolatile());
15802 
15803   EVT ResultVT = EVE->getValueType(0);
15804   EVT VecEltVT = InVecVT.getVectorElementType();
15805   unsigned Align = OriginalLoad->getAlignment();
15806   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
15807       VecEltVT.getTypeForEVT(*DAG.getContext()));
15808 
15809   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
15810     return SDValue();
15811 
15812   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
15813     ISD::NON_EXTLOAD : ISD::EXTLOAD;
15814   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
15815     return SDValue();
15816 
15817   Align = NewAlign;
15818 
15819   SDValue NewPtr = OriginalLoad->getBasePtr();
15820   SDValue Offset;
15821   EVT PtrType = NewPtr.getValueType();
15822   MachinePointerInfo MPI;
15823   SDLoc DL(EVE);
15824   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
15825     int Elt = ConstEltNo->getZExtValue();
15826     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
15827     Offset = DAG.getConstant(PtrOff, DL, PtrType);
15828     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
15829   } else {
15830     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
15831     Offset = DAG.getNode(
15832         ISD::MUL, DL, PtrType, Offset,
15833         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
15834     // Discard the pointer info except the address space because the memory
15835     // operand can't represent this new access since the offset is variable.
15836     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
15837   }
15838   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
15839 
15840   // The replacement we need to do here is a little tricky: we need to
15841   // replace an extractelement of a load with a load.
15842   // Use ReplaceAllUsesOfValuesWith to do the replacement.
15843   // Note that this replacement assumes that the extractvalue is the only
15844   // use of the load; that's okay because we don't want to perform this
15845   // transformation in other cases anyway.
15846   SDValue Load;
15847   SDValue Chain;
15848   if (ResultVT.bitsGT(VecEltVT)) {
15849     // If the result type of vextract is wider than the load, then issue an
15850     // extending load instead.
15851     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
15852                                                   VecEltVT)
15853                                    ? ISD::ZEXTLOAD
15854                                    : ISD::EXTLOAD;
15855     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
15856                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
15857                           Align, OriginalLoad->getMemOperand()->getFlags(),
15858                           OriginalLoad->getAAInfo());
15859     Chain = Load.getValue(1);
15860   } else {
15861     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
15862                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
15863                        OriginalLoad->getAAInfo());
15864     Chain = Load.getValue(1);
15865     if (ResultVT.bitsLT(VecEltVT))
15866       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
15867     else
15868       Load = DAG.getBitcast(ResultVT, Load);
15869   }
15870   WorklistRemover DeadNodes(*this);
15871   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
15872   SDValue To[] = { Load, Chain };
15873   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
15874   // Since we're explicitly calling ReplaceAllUses, add the new node to the
15875   // worklist explicitly as well.
15876   AddToWorklist(Load.getNode());
15877   AddUsersToWorklist(Load.getNode()); // Add users too
15878   // Make sure to revisit this node to clean it up; it will usually be dead.
15879   AddToWorklist(EVE);
15880   ++OpsNarrowed;
15881   return SDValue(EVE, 0);
15882 }
15883 
15884 /// Transform a vector binary operation into a scalar binary operation by moving
15885 /// the math/logic after an extract element of a vector.
15886 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
15887                                        bool LegalOperations) {
15888   SDValue Vec = ExtElt->getOperand(0);
15889   SDValue Index = ExtElt->getOperand(1);
15890   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15891   if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
15892     return SDValue();
15893 
15894   // Targets may want to avoid this to prevent an expensive register transfer.
15895   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15896   if (!TLI.shouldScalarizeBinop(Vec))
15897     return SDValue();
15898 
15899   // Extracting an element of a vector constant is constant-folded, so this
15900   // transform is just replacing a vector op with a scalar op while moving the
15901   // extract.
15902   SDValue Op0 = Vec.getOperand(0);
15903   SDValue Op1 = Vec.getOperand(1);
15904   if (isAnyConstantBuildVector(Op0, true) ||
15905       isAnyConstantBuildVector(Op1, true)) {
15906     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
15907     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
15908     SDLoc DL(ExtElt);
15909     EVT VT = ExtElt->getValueType(0);
15910     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
15911     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
15912     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
15913   }
15914 
15915   return SDValue();
15916 }
15917 
15918 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
15919   SDValue VecOp = N->getOperand(0);
15920   SDValue Index = N->getOperand(1);
15921   EVT ScalarVT = N->getValueType(0);
15922   EVT VecVT = VecOp.getValueType();
15923   if (VecOp.isUndef())
15924     return DAG.getUNDEF(ScalarVT);
15925 
15926   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
15927   //
15928   // This only really matters if the index is non-constant since other combines
15929   // on the constant elements already work.
15930   SDLoc DL(N);
15931   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
15932       Index == VecOp.getOperand(2)) {
15933     SDValue Elt = VecOp.getOperand(1);
15934     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
15935   }
15936 
15937   // (vextract (scalar_to_vector val, 0) -> val
15938   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15939     // Check if the result type doesn't match the inserted element type. A
15940     // SCALAR_TO_VECTOR may truncate the inserted element and the
15941     // EXTRACT_VECTOR_ELT may widen the extracted vector.
15942     SDValue InOp = VecOp.getOperand(0);
15943     if (InOp.getValueType() != ScalarVT) {
15944       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
15945       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
15946     }
15947     return InOp;
15948   }
15949 
15950   // extract_vector_elt of out-of-bounds element -> UNDEF
15951   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
15952   unsigned NumElts = VecVT.getVectorNumElements();
15953   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
15954     return DAG.getUNDEF(ScalarVT);
15955 
15956   // extract_vector_elt (build_vector x, y), 1 -> y
15957   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
15958       TLI.isTypeLegal(VecVT) &&
15959       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
15960     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
15961     EVT InEltVT = Elt.getValueType();
15962 
15963     // Sometimes build_vector's scalar input types do not match result type.
15964     if (ScalarVT == InEltVT)
15965       return Elt;
15966 
15967     // TODO: It may be useful to truncate if free if the build_vector implicitly
15968     // converts.
15969   }
15970 
15971   // TODO: These transforms should not require the 'hasOneUse' restriction, but
15972   // there are regressions on multiple targets without it. We can end up with a
15973   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
15974   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
15975       VecOp.hasOneUse()) {
15976     // The vector index of the LSBs of the source depend on the endian-ness.
15977     bool IsLE = DAG.getDataLayout().isLittleEndian();
15978     unsigned ExtractIndex = IndexC->getZExtValue();
15979     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
15980     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
15981     SDValue BCSrc = VecOp.getOperand(0);
15982     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
15983       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
15984 
15985     if (LegalTypes && BCSrc.getValueType().isInteger() &&
15986         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
15987       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
15988       // trunc i64 X to i32
15989       SDValue X = BCSrc.getOperand(0);
15990       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
15991              "Extract element and scalar to vector can't change element type "
15992              "from FP to integer.");
15993       unsigned XBitWidth = X.getValueSizeInBits();
15994       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
15995       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
15996 
15997       // An extract element return value type can be wider than its vector
15998       // operand element type. In that case, the high bits are undefined, so
15999       // it's possible that we may need to extend rather than truncate.
16000       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16001         assert(XBitWidth % VecEltBitWidth == 0 &&
16002                "Scalar bitwidth must be a multiple of vector element bitwidth");
16003         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16004       }
16005     }
16006   }
16007 
16008   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16009     return BO;
16010 
16011   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16012   // We only perform this optimization before the op legalization phase because
16013   // we may introduce new vector instructions which are not backed by TD
16014   // patterns. For example on AVX, extracting elements from a wide vector
16015   // without using extract_subvector. However, if we can find an underlying
16016   // scalar value, then we can always use that.
16017   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16018     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16019     // Find the new index to extract from.
16020     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16021 
16022     // Extracting an undef index is undef.
16023     if (OrigElt == -1)
16024       return DAG.getUNDEF(ScalarVT);
16025 
16026     // Select the right vector half to extract from.
16027     SDValue SVInVec;
16028     if (OrigElt < (int)NumElts) {
16029       SVInVec = VecOp.getOperand(0);
16030     } else {
16031       SVInVec = VecOp.getOperand(1);
16032       OrigElt -= NumElts;
16033     }
16034 
16035     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16036       SDValue InOp = SVInVec.getOperand(OrigElt);
16037       if (InOp.getValueType() != ScalarVT) {
16038         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16039         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16040       }
16041 
16042       return InOp;
16043     }
16044 
16045     // FIXME: We should handle recursing on other vector shuffles and
16046     // scalar_to_vector here as well.
16047 
16048     if (!LegalOperations ||
16049         // FIXME: Should really be just isOperationLegalOrCustom.
16050         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16051         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16052       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16053       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16054                          DAG.getConstant(OrigElt, DL, IndexTy));
16055     }
16056   }
16057 
16058   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16059   // simplify it based on the (valid) extraction indices.
16060   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16061         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16062                Use->getOperand(0) == VecOp &&
16063                isa<ConstantSDNode>(Use->getOperand(1));
16064       })) {
16065     APInt DemandedElts = APInt::getNullValue(NumElts);
16066     for (SDNode *Use : VecOp->uses()) {
16067       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16068       if (CstElt->getAPIntValue().ult(NumElts))
16069         DemandedElts.setBit(CstElt->getZExtValue());
16070     }
16071     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16072       // We simplified the vector operand of this extract element. If this
16073       // extract is not dead, visit it again so it is folded properly.
16074       if (N->getOpcode() != ISD::DELETED_NODE)
16075         AddToWorklist(N);
16076       return SDValue(N, 0);
16077     }
16078   }
16079 
16080   // Everything under here is trying to match an extract of a loaded value.
16081   // If the result of load has to be truncated, then it's not necessarily
16082   // profitable.
16083   bool BCNumEltsChanged = false;
16084   EVT ExtVT = VecVT.getVectorElementType();
16085   EVT LVT = ExtVT;
16086   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16087     return SDValue();
16088 
16089   if (VecOp.getOpcode() == ISD::BITCAST) {
16090     // Don't duplicate a load with other uses.
16091     if (!VecOp.hasOneUse())
16092       return SDValue();
16093 
16094     EVT BCVT = VecOp.getOperand(0).getValueType();
16095     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16096       return SDValue();
16097     if (NumElts != BCVT.getVectorNumElements())
16098       BCNumEltsChanged = true;
16099     VecOp = VecOp.getOperand(0);
16100     ExtVT = BCVT.getVectorElementType();
16101   }
16102 
16103   // extract (vector load $addr), i --> load $addr + i * size
16104   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16105       ISD::isNormalLoad(VecOp.getNode()) &&
16106       !Index->hasPredecessor(VecOp.getNode())) {
16107     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16108     if (VecLoad && !VecLoad->isVolatile())
16109       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16110   }
16111 
16112   // Perform only after legalization to ensure build_vector / vector_shuffle
16113   // optimizations have already been done.
16114   if (!LegalOperations || !IndexC)
16115     return SDValue();
16116 
16117   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16118   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16119   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16120   int Elt = IndexC->getZExtValue();
16121   LoadSDNode *LN0 = nullptr;
16122   if (ISD::isNormalLoad(VecOp.getNode())) {
16123     LN0 = cast<LoadSDNode>(VecOp);
16124   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16125              VecOp.getOperand(0).getValueType() == ExtVT &&
16126              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16127     // Don't duplicate a load with other uses.
16128     if (!VecOp.hasOneUse())
16129       return SDValue();
16130 
16131     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16132   }
16133   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16134     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16135     // =>
16136     // (load $addr+1*size)
16137 
16138     // Don't duplicate a load with other uses.
16139     if (!VecOp.hasOneUse())
16140       return SDValue();
16141 
16142     // If the bit convert changed the number of elements, it is unsafe
16143     // to examine the mask.
16144     if (BCNumEltsChanged)
16145       return SDValue();
16146 
16147     // Select the input vector, guarding against out of range extract vector.
16148     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16149     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16150 
16151     if (VecOp.getOpcode() == ISD::BITCAST) {
16152       // Don't duplicate a load with other uses.
16153       if (!VecOp.hasOneUse())
16154         return SDValue();
16155 
16156       VecOp = VecOp.getOperand(0);
16157     }
16158     if (ISD::isNormalLoad(VecOp.getNode())) {
16159       LN0 = cast<LoadSDNode>(VecOp);
16160       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16161       Index = DAG.getConstant(Elt, DL, Index.getValueType());
16162     }
16163   }
16164 
16165   // Make sure we found a non-volatile load and the extractelement is
16166   // the only use.
16167   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
16168     return SDValue();
16169 
16170   // If Idx was -1 above, Elt is going to be -1, so just return undef.
16171   if (Elt == -1)
16172     return DAG.getUNDEF(LVT);
16173 
16174   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16175 }
16176 
16177 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16178 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16179   // We perform this optimization post type-legalization because
16180   // the type-legalizer often scalarizes integer-promoted vectors.
16181   // Performing this optimization before may create bit-casts which
16182   // will be type-legalized to complex code sequences.
16183   // We perform this optimization only before the operation legalizer because we
16184   // may introduce illegal operations.
16185   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16186     return SDValue();
16187 
16188   unsigned NumInScalars = N->getNumOperands();
16189   SDLoc DL(N);
16190   EVT VT = N->getValueType(0);
16191 
16192   // Check to see if this is a BUILD_VECTOR of a bunch of values
16193   // which come from any_extend or zero_extend nodes. If so, we can create
16194   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16195   // optimizations. We do not handle sign-extend because we can't fill the sign
16196   // using shuffles.
16197   EVT SourceType = MVT::Other;
16198   bool AllAnyExt = true;
16199 
16200   for (unsigned i = 0; i != NumInScalars; ++i) {
16201     SDValue In = N->getOperand(i);
16202     // Ignore undef inputs.
16203     if (In.isUndef()) continue;
16204 
16205     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
16206     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16207 
16208     // Abort if the element is not an extension.
16209     if (!ZeroExt && !AnyExt) {
16210       SourceType = MVT::Other;
16211       break;
16212     }
16213 
16214     // The input is a ZeroExt or AnyExt. Check the original type.
16215     EVT InTy = In.getOperand(0).getValueType();
16216 
16217     // Check that all of the widened source types are the same.
16218     if (SourceType == MVT::Other)
16219       // First time.
16220       SourceType = InTy;
16221     else if (InTy != SourceType) {
16222       // Multiple income types. Abort.
16223       SourceType = MVT::Other;
16224       break;
16225     }
16226 
16227     // Check if all of the extends are ANY_EXTENDs.
16228     AllAnyExt &= AnyExt;
16229   }
16230 
16231   // In order to have valid types, all of the inputs must be extended from the
16232   // same source type and all of the inputs must be any or zero extend.
16233   // Scalar sizes must be a power of two.
16234   EVT OutScalarTy = VT.getScalarType();
16235   bool ValidTypes = SourceType != MVT::Other &&
16236                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16237                  isPowerOf2_32(SourceType.getSizeInBits());
16238 
16239   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16240   // turn into a single shuffle instruction.
16241   if (!ValidTypes)
16242     return SDValue();
16243 
16244   bool isLE = DAG.getDataLayout().isLittleEndian();
16245   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16246   assert(ElemRatio > 1 && "Invalid element size ratio");
16247   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16248                                DAG.getConstant(0, DL, SourceType);
16249 
16250   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16251   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16252 
16253   // Populate the new build_vector
16254   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16255     SDValue Cast = N->getOperand(i);
16256     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16257             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16258             Cast.isUndef()) && "Invalid cast opcode");
16259     SDValue In;
16260     if (Cast.isUndef())
16261       In = DAG.getUNDEF(SourceType);
16262     else
16263       In = Cast->getOperand(0);
16264     unsigned Index = isLE ? (i * ElemRatio) :
16265                             (i * ElemRatio + (ElemRatio - 1));
16266 
16267     assert(Index < Ops.size() && "Invalid index");
16268     Ops[Index] = In;
16269   }
16270 
16271   // The type of the new BUILD_VECTOR node.
16272   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16273   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16274          "Invalid vector size");
16275   // Check if the new vector type is legal.
16276   if (!isTypeLegal(VecVT) ||
16277       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16278        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16279     return SDValue();
16280 
16281   // Make the new BUILD_VECTOR.
16282   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16283 
16284   // The new BUILD_VECTOR node has the potential to be further optimized.
16285   AddToWorklist(BV.getNode());
16286   // Bitcast to the desired type.
16287   return DAG.getBitcast(VT, BV);
16288 }
16289 
16290 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16291                                            ArrayRef<int> VectorMask,
16292                                            SDValue VecIn1, SDValue VecIn2,
16293                                            unsigned LeftIdx) {
16294   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16295   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16296 
16297   EVT VT = N->getValueType(0);
16298   EVT InVT1 = VecIn1.getValueType();
16299   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16300 
16301   unsigned Vec2Offset = 0;
16302   unsigned NumElems = VT.getVectorNumElements();
16303   unsigned ShuffleNumElems = NumElems;
16304 
16305   // In case both the input vectors are extracted from same base
16306   // vector we do not need extra addend (Vec2Offset) while
16307   // computing shuffle mask.
16308   if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16309       !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
16310       !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
16311     Vec2Offset = InVT1.getVectorNumElements();
16312 
16313   // We can't generate a shuffle node with mismatched input and output types.
16314   // Try to make the types match the type of the output.
16315   if (InVT1 != VT || InVT2 != VT) {
16316     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16317       // If the output vector length is a multiple of both input lengths,
16318       // we can concatenate them and pad the rest with undefs.
16319       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16320       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16321       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16322       ConcatOps[0] = VecIn1;
16323       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16324       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16325       VecIn2 = SDValue();
16326     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16327       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16328         return SDValue();
16329 
16330       if (!VecIn2.getNode()) {
16331         // If we only have one input vector, and it's twice the size of the
16332         // output, split it in two.
16333         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16334                              DAG.getConstant(NumElems, DL, IdxTy));
16335         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16336         // Since we now have shorter input vectors, adjust the offset of the
16337         // second vector's start.
16338         Vec2Offset = NumElems;
16339       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16340         // VecIn1 is wider than the output, and we have another, possibly
16341         // smaller input. Pad the smaller input with undefs, shuffle at the
16342         // input vector width, and extract the output.
16343         // The shuffle type is different than VT, so check legality again.
16344         if (LegalOperations &&
16345             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16346           return SDValue();
16347 
16348         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16349         // lower it back into a BUILD_VECTOR. So if the inserted type is
16350         // illegal, don't even try.
16351         if (InVT1 != InVT2) {
16352           if (!TLI.isTypeLegal(InVT2))
16353             return SDValue();
16354           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16355                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16356         }
16357         ShuffleNumElems = NumElems * 2;
16358       } else {
16359         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16360         // than VecIn1. We can't handle this for now - this case will disappear
16361         // when we start sorting the vectors by type.
16362         return SDValue();
16363       }
16364     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16365                InVT1.getSizeInBits() == VT.getSizeInBits()) {
16366       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16367       ConcatOps[0] = VecIn2;
16368       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16369     } else {
16370       // TODO: Support cases where the length mismatch isn't exactly by a
16371       // factor of 2.
16372       // TODO: Move this check upwards, so that if we have bad type
16373       // mismatches, we don't create any DAG nodes.
16374       return SDValue();
16375     }
16376   }
16377 
16378   // Initialize mask to undef.
16379   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16380 
16381   // Only need to run up to the number of elements actually used, not the
16382   // total number of elements in the shuffle - if we are shuffling a wider
16383   // vector, the high lanes should be set to undef.
16384   for (unsigned i = 0; i != NumElems; ++i) {
16385     if (VectorMask[i] <= 0)
16386       continue;
16387 
16388     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16389     if (VectorMask[i] == (int)LeftIdx) {
16390       Mask[i] = ExtIndex;
16391     } else if (VectorMask[i] == (int)LeftIdx + 1) {
16392       Mask[i] = Vec2Offset + ExtIndex;
16393     }
16394   }
16395 
16396   // The type the input vectors may have changed above.
16397   InVT1 = VecIn1.getValueType();
16398 
16399   // If we already have a VecIn2, it should have the same type as VecIn1.
16400   // If we don't, get an undef/zero vector of the appropriate type.
16401   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16402   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16403 
16404   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16405   if (ShuffleNumElems > NumElems)
16406     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16407 
16408   return Shuffle;
16409 }
16410 
16411 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16412   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16413 
16414   // First, determine where the build vector is not undef.
16415   // TODO: We could extend this to handle zero elements as well as undefs.
16416   int NumBVOps = BV->getNumOperands();
16417   int ZextElt = -1;
16418   for (int i = 0; i != NumBVOps; ++i) {
16419     SDValue Op = BV->getOperand(i);
16420     if (Op.isUndef())
16421       continue;
16422     if (ZextElt == -1)
16423       ZextElt = i;
16424     else
16425       return SDValue();
16426   }
16427   // Bail out if there's no non-undef element.
16428   if (ZextElt == -1)
16429     return SDValue();
16430 
16431   // The build vector contains some number of undef elements and exactly
16432   // one other element. That other element must be a zero-extended scalar
16433   // extracted from a vector at a constant index to turn this into a shuffle.
16434   // Also, require that the build vector does not implicitly truncate/extend
16435   // its elements.
16436   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16437   EVT VT = BV->getValueType(0);
16438   SDValue Zext = BV->getOperand(ZextElt);
16439   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16440       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16441       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16442       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16443     return SDValue();
16444 
16445   // The zero-extend must be a multiple of the source size, and we must be
16446   // building a vector of the same size as the source of the extract element.
16447   SDValue Extract = Zext.getOperand(0);
16448   unsigned DestSize = Zext.getValueSizeInBits();
16449   unsigned SrcSize = Extract.getValueSizeInBits();
16450   if (DestSize % SrcSize != 0 ||
16451       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16452     return SDValue();
16453 
16454   // Create a shuffle mask that will combine the extracted element with zeros
16455   // and undefs.
16456   int ZextRatio = DestSize / SrcSize;
16457   int NumMaskElts = NumBVOps * ZextRatio;
16458   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16459   for (int i = 0; i != NumMaskElts; ++i) {
16460     if (i / ZextRatio == ZextElt) {
16461       // The low bits of the (potentially translated) extracted element map to
16462       // the source vector. The high bits map to zero. We will use a zero vector
16463       // as the 2nd source operand of the shuffle, so use the 1st element of
16464       // that vector (mask value is number-of-elements) for the high bits.
16465       if (i % ZextRatio == 0)
16466         ShufMask[i] = Extract.getConstantOperandVal(1);
16467       else
16468         ShufMask[i] = NumMaskElts;
16469     }
16470 
16471     // Undef elements of the build vector remain undef because we initialize
16472     // the shuffle mask with -1.
16473   }
16474 
16475   // Turn this into a shuffle with zero if that's legal.
16476   EVT VecVT = Extract.getOperand(0).getValueType();
16477   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16478     return SDValue();
16479 
16480   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16481   // bitcast (shuffle V, ZeroVec, VectorMask)
16482   SDLoc DL(BV);
16483   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16484   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16485                                       ShufMask);
16486   return DAG.getBitcast(VT, Shuf);
16487 }
16488 
16489 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16490 // operations. If the types of the vectors we're extracting from allow it,
16491 // turn this into a vector_shuffle node.
16492 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16493   SDLoc DL(N);
16494   EVT VT = N->getValueType(0);
16495 
16496   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16497   if (!isTypeLegal(VT))
16498     return SDValue();
16499 
16500   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16501     return V;
16502 
16503   // May only combine to shuffle after legalize if shuffle is legal.
16504   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16505     return SDValue();
16506 
16507   bool UsesZeroVector = false;
16508   unsigned NumElems = N->getNumOperands();
16509 
16510   // Record, for each element of the newly built vector, which input vector
16511   // that element comes from. -1 stands for undef, 0 for the zero vector,
16512   // and positive values for the input vectors.
16513   // VectorMask maps each element to its vector number, and VecIn maps vector
16514   // numbers to their initial SDValues.
16515 
16516   SmallVector<int, 8> VectorMask(NumElems, -1);
16517   SmallVector<SDValue, 8> VecIn;
16518   VecIn.push_back(SDValue());
16519 
16520   for (unsigned i = 0; i != NumElems; ++i) {
16521     SDValue Op = N->getOperand(i);
16522 
16523     if (Op.isUndef())
16524       continue;
16525 
16526     // See if we can use a blend with a zero vector.
16527     // TODO: Should we generalize this to a blend with an arbitrary constant
16528     // vector?
16529     if (isNullConstant(Op) || isNullFPConstant(Op)) {
16530       UsesZeroVector = true;
16531       VectorMask[i] = 0;
16532       continue;
16533     }
16534 
16535     // Not an undef or zero. If the input is something other than an
16536     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16537     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16538         !isa<ConstantSDNode>(Op.getOperand(1)))
16539       return SDValue();
16540     SDValue ExtractedFromVec = Op.getOperand(0);
16541 
16542     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
16543     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16544       return SDValue();
16545 
16546     // All inputs must have the same element type as the output.
16547     if (VT.getVectorElementType() !=
16548         ExtractedFromVec.getValueType().getVectorElementType())
16549       return SDValue();
16550 
16551     // Have we seen this input vector before?
16552     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16553     // a map back from SDValues to numbers isn't worth it.
16554     unsigned Idx = std::distance(
16555         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16556     if (Idx == VecIn.size())
16557       VecIn.push_back(ExtractedFromVec);
16558 
16559     VectorMask[i] = Idx;
16560   }
16561 
16562   // If we didn't find at least one input vector, bail out.
16563   if (VecIn.size() < 2)
16564     return SDValue();
16565 
16566   // If all the Operands of BUILD_VECTOR extract from same
16567   // vector, then split the vector efficiently based on the maximum
16568   // vector access index and adjust the VectorMask and
16569   // VecIn accordingly.
16570   if (VecIn.size() == 2) {
16571     unsigned MaxIndex = 0;
16572     unsigned NearestPow2 = 0;
16573     SDValue Vec = VecIn.back();
16574     EVT InVT = Vec.getValueType();
16575     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16576     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16577 
16578     for (unsigned i = 0; i < NumElems; i++) {
16579       if (VectorMask[i] <= 0)
16580         continue;
16581       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16582       IndexVec[i] = Index;
16583       MaxIndex = std::max(MaxIndex, Index);
16584     }
16585 
16586     NearestPow2 = PowerOf2Ceil(MaxIndex);
16587     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16588         NumElems * 2 < NearestPow2) {
16589       unsigned SplitSize = NearestPow2 / 2;
16590       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16591                                      InVT.getVectorElementType(), SplitSize);
16592       if (TLI.isTypeLegal(SplitVT)) {
16593         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16594                                      DAG.getConstant(SplitSize, DL, IdxTy));
16595         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16596                                      DAG.getConstant(0, DL, IdxTy));
16597         VecIn.pop_back();
16598         VecIn.push_back(VecIn1);
16599         VecIn.push_back(VecIn2);
16600 
16601         for (unsigned i = 0; i < NumElems; i++) {
16602           if (VectorMask[i] <= 0)
16603             continue;
16604           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16605         }
16606       }
16607     }
16608   }
16609 
16610   // TODO: We want to sort the vectors by descending length, so that adjacent
16611   // pairs have similar length, and the longer vector is always first in the
16612   // pair.
16613 
16614   // TODO: Should this fire if some of the input vectors has illegal type (like
16615   // it does now), or should we let legalization run its course first?
16616 
16617   // Shuffle phase:
16618   // Take pairs of vectors, and shuffle them so that the result has elements
16619   // from these vectors in the correct places.
16620   // For example, given:
16621   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16622   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16623   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16624   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16625   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16626   // We will generate:
16627   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16628   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16629   SmallVector<SDValue, 4> Shuffles;
16630   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16631     unsigned LeftIdx = 2 * In + 1;
16632     SDValue VecLeft = VecIn[LeftIdx];
16633     SDValue VecRight =
16634         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16635 
16636     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16637                                                 VecRight, LeftIdx))
16638       Shuffles.push_back(Shuffle);
16639     else
16640       return SDValue();
16641   }
16642 
16643   // If we need the zero vector as an "ingredient" in the blend tree, add it
16644   // to the list of shuffles.
16645   if (UsesZeroVector)
16646     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16647                                       : DAG.getConstantFP(0.0, DL, VT));
16648 
16649   // If we only have one shuffle, we're done.
16650   if (Shuffles.size() == 1)
16651     return Shuffles[0];
16652 
16653   // Update the vector mask to point to the post-shuffle vectors.
16654   for (int &Vec : VectorMask)
16655     if (Vec == 0)
16656       Vec = Shuffles.size() - 1;
16657     else
16658       Vec = (Vec - 1) / 2;
16659 
16660   // More than one shuffle. Generate a binary tree of blends, e.g. if from
16661   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16662   // generate:
16663   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16664   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16665   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16666   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16667   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16668   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16669   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16670 
16671   // Make sure the initial size of the shuffle list is even.
16672   if (Shuffles.size() % 2)
16673     Shuffles.push_back(DAG.getUNDEF(VT));
16674 
16675   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16676     if (CurSize % 2) {
16677       Shuffles[CurSize] = DAG.getUNDEF(VT);
16678       CurSize++;
16679     }
16680     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16681       int Left = 2 * In;
16682       int Right = 2 * In + 1;
16683       SmallVector<int, 8> Mask(NumElems, -1);
16684       for (unsigned i = 0; i != NumElems; ++i) {
16685         if (VectorMask[i] == Left) {
16686           Mask[i] = i;
16687           VectorMask[i] = In;
16688         } else if (VectorMask[i] == Right) {
16689           Mask[i] = i + NumElems;
16690           VectorMask[i] = In;
16691         }
16692       }
16693 
16694       Shuffles[In] =
16695           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16696     }
16697   }
16698   return Shuffles[0];
16699 }
16700 
16701 // Try to turn a build vector of zero extends of extract vector elts into a
16702 // a vector zero extend and possibly an extract subvector.
16703 // TODO: Support sign extend or any extend?
16704 // TODO: Allow undef elements?
16705 // TODO: Don't require the extracts to start at element 0.
16706 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16707   if (LegalOperations)
16708     return SDValue();
16709 
16710   EVT VT = N->getValueType(0);
16711 
16712   SDValue Op0 = N->getOperand(0);
16713   auto checkElem = [&](SDValue Op) -> int64_t {
16714     if (Op.getOpcode() == ISD::ZERO_EXTEND &&
16715         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16716         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
16717       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
16718         return C->getZExtValue();
16719     return -1;
16720   };
16721 
16722   // Make sure the first element matches
16723   // (zext (extract_vector_elt X, C))
16724   int64_t Offset = checkElem(Op0);
16725   if (Offset < 0)
16726     return SDValue();
16727 
16728   unsigned NumElems = N->getNumOperands();
16729   SDValue In = Op0.getOperand(0).getOperand(0);
16730   EVT InSVT = In.getValueType().getScalarType();
16731   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
16732 
16733   // Don't create an illegal input type after type legalization.
16734   if (LegalTypes && !TLI.isTypeLegal(InVT))
16735     return SDValue();
16736 
16737   // Ensure all the elements come from the same vector and are adjacent.
16738   for (unsigned i = 1; i != NumElems; ++i) {
16739     if ((Offset + i) != checkElem(N->getOperand(i)))
16740       return SDValue();
16741   }
16742 
16743   SDLoc DL(N);
16744   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
16745                    Op0.getOperand(0).getOperand(1));
16746   return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
16747 }
16748 
16749 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
16750   EVT VT = N->getValueType(0);
16751 
16752   // A vector built entirely of undefs is undef.
16753   if (ISD::allOperandsUndef(N))
16754     return DAG.getUNDEF(VT);
16755 
16756   // If this is a splat of a bitcast from another vector, change to a
16757   // concat_vector.
16758   // For example:
16759   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
16760   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
16761   //
16762   // If X is a build_vector itself, the concat can become a larger build_vector.
16763   // TODO: Maybe this is useful for non-splat too?
16764   if (!LegalOperations) {
16765     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
16766       Splat = peekThroughBitcasts(Splat);
16767       EVT SrcVT = Splat.getValueType();
16768       if (SrcVT.isVector()) {
16769         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
16770         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
16771                                      SrcVT.getVectorElementType(), NumElts);
16772         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
16773           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
16774           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
16775                                        NewVT, Ops);
16776           return DAG.getBitcast(VT, Concat);
16777         }
16778       }
16779     }
16780   }
16781 
16782   // Check if we can express BUILD VECTOR via subvector extract.
16783   if (!LegalTypes && (N->getNumOperands() > 1)) {
16784     SDValue Op0 = N->getOperand(0);
16785     auto checkElem = [&](SDValue Op) -> uint64_t {
16786       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
16787           (Op0.getOperand(0) == Op.getOperand(0)))
16788         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
16789           return CNode->getZExtValue();
16790       return -1;
16791     };
16792 
16793     int Offset = checkElem(Op0);
16794     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
16795       if (Offset + i != checkElem(N->getOperand(i))) {
16796         Offset = -1;
16797         break;
16798       }
16799     }
16800 
16801     if ((Offset == 0) &&
16802         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
16803       return Op0.getOperand(0);
16804     if ((Offset != -1) &&
16805         ((Offset % N->getValueType(0).getVectorNumElements()) ==
16806          0)) // IDX must be multiple of output size.
16807       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
16808                          Op0.getOperand(0), Op0.getOperand(1));
16809   }
16810 
16811   if (SDValue V = convertBuildVecZextToZext(N))
16812     return V;
16813 
16814   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
16815     return V;
16816 
16817   if (SDValue V = reduceBuildVecToShuffle(N))
16818     return V;
16819 
16820   return SDValue();
16821 }
16822 
16823 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
16824   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16825   EVT OpVT = N->getOperand(0).getValueType();
16826 
16827   // If the operands are legal vectors, leave them alone.
16828   if (TLI.isTypeLegal(OpVT))
16829     return SDValue();
16830 
16831   SDLoc DL(N);
16832   EVT VT = N->getValueType(0);
16833   SmallVector<SDValue, 8> Ops;
16834 
16835   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
16836   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16837 
16838   // Keep track of what we encounter.
16839   bool AnyInteger = false;
16840   bool AnyFP = false;
16841   for (const SDValue &Op : N->ops()) {
16842     if (ISD::BITCAST == Op.getOpcode() &&
16843         !Op.getOperand(0).getValueType().isVector())
16844       Ops.push_back(Op.getOperand(0));
16845     else if (ISD::UNDEF == Op.getOpcode())
16846       Ops.push_back(ScalarUndef);
16847     else
16848       return SDValue();
16849 
16850     // Note whether we encounter an integer or floating point scalar.
16851     // If it's neither, bail out, it could be something weird like x86mmx.
16852     EVT LastOpVT = Ops.back().getValueType();
16853     if (LastOpVT.isFloatingPoint())
16854       AnyFP = true;
16855     else if (LastOpVT.isInteger())
16856       AnyInteger = true;
16857     else
16858       return SDValue();
16859   }
16860 
16861   // If any of the operands is a floating point scalar bitcast to a vector,
16862   // use floating point types throughout, and bitcast everything.
16863   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
16864   if (AnyFP) {
16865     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
16866     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
16867     if (AnyInteger) {
16868       for (SDValue &Op : Ops) {
16869         if (Op.getValueType() == SVT)
16870           continue;
16871         if (Op.isUndef())
16872           Op = ScalarUndef;
16873         else
16874           Op = DAG.getBitcast(SVT, Op);
16875       }
16876     }
16877   }
16878 
16879   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
16880                                VT.getSizeInBits() / SVT.getSizeInBits());
16881   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
16882 }
16883 
16884 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
16885 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
16886 // most two distinct vectors the same size as the result, attempt to turn this
16887 // into a legal shuffle.
16888 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
16889   EVT VT = N->getValueType(0);
16890   EVT OpVT = N->getOperand(0).getValueType();
16891   int NumElts = VT.getVectorNumElements();
16892   int NumOpElts = OpVT.getVectorNumElements();
16893 
16894   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
16895   SmallVector<int, 8> Mask;
16896 
16897   for (SDValue Op : N->ops()) {
16898     Op = peekThroughBitcasts(Op);
16899 
16900     // UNDEF nodes convert to UNDEF shuffle mask values.
16901     if (Op.isUndef()) {
16902       Mask.append((unsigned)NumOpElts, -1);
16903       continue;
16904     }
16905 
16906     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16907       return SDValue();
16908 
16909     // What vector are we extracting the subvector from and at what index?
16910     SDValue ExtVec = Op.getOperand(0);
16911 
16912     // We want the EVT of the original extraction to correctly scale the
16913     // extraction index.
16914     EVT ExtVT = ExtVec.getValueType();
16915     ExtVec = peekThroughBitcasts(ExtVec);
16916 
16917     // UNDEF nodes convert to UNDEF shuffle mask values.
16918     if (ExtVec.isUndef()) {
16919       Mask.append((unsigned)NumOpElts, -1);
16920       continue;
16921     }
16922 
16923     if (!isa<ConstantSDNode>(Op.getOperand(1)))
16924       return SDValue();
16925     int ExtIdx = Op.getConstantOperandVal(1);
16926 
16927     // Ensure that we are extracting a subvector from a vector the same
16928     // size as the result.
16929     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
16930       return SDValue();
16931 
16932     // Scale the subvector index to account for any bitcast.
16933     int NumExtElts = ExtVT.getVectorNumElements();
16934     if (0 == (NumExtElts % NumElts))
16935       ExtIdx /= (NumExtElts / NumElts);
16936     else if (0 == (NumElts % NumExtElts))
16937       ExtIdx *= (NumElts / NumExtElts);
16938     else
16939       return SDValue();
16940 
16941     // At most we can reference 2 inputs in the final shuffle.
16942     if (SV0.isUndef() || SV0 == ExtVec) {
16943       SV0 = ExtVec;
16944       for (int i = 0; i != NumOpElts; ++i)
16945         Mask.push_back(i + ExtIdx);
16946     } else if (SV1.isUndef() || SV1 == ExtVec) {
16947       SV1 = ExtVec;
16948       for (int i = 0; i != NumOpElts; ++i)
16949         Mask.push_back(i + ExtIdx + NumElts);
16950     } else {
16951       return SDValue();
16952     }
16953   }
16954 
16955   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
16956     return SDValue();
16957 
16958   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
16959                               DAG.getBitcast(VT, SV1), Mask);
16960 }
16961 
16962 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
16963   // If we only have one input vector, we don't need to do any concatenation.
16964   if (N->getNumOperands() == 1)
16965     return N->getOperand(0);
16966 
16967   // Check if all of the operands are undefs.
16968   EVT VT = N->getValueType(0);
16969   if (ISD::allOperandsUndef(N))
16970     return DAG.getUNDEF(VT);
16971 
16972   // Optimize concat_vectors where all but the first of the vectors are undef.
16973   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
16974         return Op.isUndef();
16975       })) {
16976     SDValue In = N->getOperand(0);
16977     assert(In.getValueType().isVector() && "Must concat vectors");
16978 
16979     SDValue Scalar = peekThroughOneUseBitcasts(In);
16980 
16981     // concat_vectors(scalar_to_vector(scalar), undef) ->
16982     //     scalar_to_vector(scalar)
16983     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16984          Scalar.hasOneUse()) {
16985       EVT SVT = Scalar.getValueType().getVectorElementType();
16986       if (SVT == Scalar.getOperand(0).getValueType())
16987         Scalar = Scalar.getOperand(0);
16988     }
16989 
16990     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
16991     if (!Scalar.getValueType().isVector()) {
16992       // If the bitcast type isn't legal, it might be a trunc of a legal type;
16993       // look through the trunc so we can still do the transform:
16994       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
16995       if (Scalar->getOpcode() == ISD::TRUNCATE &&
16996           !TLI.isTypeLegal(Scalar.getValueType()) &&
16997           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
16998         Scalar = Scalar->getOperand(0);
16999 
17000       EVT SclTy = Scalar.getValueType();
17001 
17002       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17003         return SDValue();
17004 
17005       // Bail out if the vector size is not a multiple of the scalar size.
17006       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17007         return SDValue();
17008 
17009       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17010       if (VNTNumElms < 2)
17011         return SDValue();
17012 
17013       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17014       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17015         return SDValue();
17016 
17017       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17018       return DAG.getBitcast(VT, Res);
17019     }
17020   }
17021 
17022   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17023   // We have already tested above for an UNDEF only concatenation.
17024   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17025   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17026   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17027     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17028   };
17029   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17030     SmallVector<SDValue, 8> Opnds;
17031     EVT SVT = VT.getScalarType();
17032 
17033     EVT MinVT = SVT;
17034     if (!SVT.isFloatingPoint()) {
17035       // If BUILD_VECTOR are from built from integer, they may have different
17036       // operand types. Get the smallest type and truncate all operands to it.
17037       bool FoundMinVT = false;
17038       for (const SDValue &Op : N->ops())
17039         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17040           EVT OpSVT = Op.getOperand(0).getValueType();
17041           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17042           FoundMinVT = true;
17043         }
17044       assert(FoundMinVT && "Concat vector type mismatch");
17045     }
17046 
17047     for (const SDValue &Op : N->ops()) {
17048       EVT OpVT = Op.getValueType();
17049       unsigned NumElts = OpVT.getVectorNumElements();
17050 
17051       if (ISD::UNDEF == Op.getOpcode())
17052         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17053 
17054       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17055         if (SVT.isFloatingPoint()) {
17056           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17057           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17058         } else {
17059           for (unsigned i = 0; i != NumElts; ++i)
17060             Opnds.push_back(
17061                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17062         }
17063       }
17064     }
17065 
17066     assert(VT.getVectorNumElements() == Opnds.size() &&
17067            "Concat vector type mismatch");
17068     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17069   }
17070 
17071   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17072   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17073     return V;
17074 
17075   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17076   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17077     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17078       return V;
17079 
17080   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17081   // nodes often generate nop CONCAT_VECTOR nodes.
17082   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17083   // place the incoming vectors at the exact same location.
17084   SDValue SingleSource = SDValue();
17085   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17086 
17087   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17088     SDValue Op = N->getOperand(i);
17089 
17090     if (Op.isUndef())
17091       continue;
17092 
17093     // Check if this is the identity extract:
17094     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17095       return SDValue();
17096 
17097     // Find the single incoming vector for the extract_subvector.
17098     if (SingleSource.getNode()) {
17099       if (Op.getOperand(0) != SingleSource)
17100         return SDValue();
17101     } else {
17102       SingleSource = Op.getOperand(0);
17103 
17104       // Check the source type is the same as the type of the result.
17105       // If not, this concat may extend the vector, so we can not
17106       // optimize it away.
17107       if (SingleSource.getValueType() != N->getValueType(0))
17108         return SDValue();
17109     }
17110 
17111     unsigned IdentityIndex = i * PartNumElem;
17112     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17113     // The extract index must be constant.
17114     if (!CS)
17115       return SDValue();
17116 
17117     // Check that we are reading from the identity index.
17118     if (CS->getZExtValue() != IdentityIndex)
17119       return SDValue();
17120   }
17121 
17122   if (SingleSource.getNode())
17123     return SingleSource;
17124 
17125   return SDValue();
17126 }
17127 
17128 /// If we are extracting a subvector produced by a wide binary operator try
17129 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17130 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17131   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17132   // some of these bailouts with other transforms.
17133 
17134   // The extract index must be a constant, so we can map it to a concat operand.
17135   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17136   if (!ExtractIndexC)
17137     return SDValue();
17138 
17139   // We are looking for an optionally bitcasted wide vector binary operator
17140   // feeding an extract subvector.
17141   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17142   if (!ISD::isBinaryOp(BinOp.getNode()))
17143     return SDValue();
17144 
17145   // The binop must be a vector type, so we can extract some fraction of it.
17146   EVT WideBVT = BinOp.getValueType();
17147   if (!WideBVT.isVector())
17148     return SDValue();
17149 
17150   EVT VT = Extract->getValueType(0);
17151   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17152   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17153          "Extract index is not a multiple of the vector length.");
17154 
17155   // Bail out if this is not a proper multiple width extraction.
17156   unsigned WideWidth = WideBVT.getSizeInBits();
17157   unsigned NarrowWidth = VT.getSizeInBits();
17158   if (WideWidth % NarrowWidth != 0)
17159     return SDValue();
17160 
17161   // Bail out if we are extracting a fraction of a single operation. This can
17162   // occur because we potentially looked through a bitcast of the binop.
17163   unsigned NarrowingRatio = WideWidth / NarrowWidth;
17164   unsigned WideNumElts = WideBVT.getVectorNumElements();
17165   if (WideNumElts % NarrowingRatio != 0)
17166     return SDValue();
17167 
17168   // Bail out if the target does not support a narrower version of the binop.
17169   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17170                                    WideNumElts / NarrowingRatio);
17171   unsigned BOpcode = BinOp.getOpcode();
17172   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17173   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17174     return SDValue();
17175 
17176   // If extraction is cheap, we don't need to look at the binop operands
17177   // for concat ops. The narrow binop alone makes this transform profitable.
17178   // We can't just reuse the original extract index operand because we may have
17179   // bitcasted.
17180   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17181   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17182   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17183   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17184       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17185     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17186     SDLoc DL(Extract);
17187     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17188     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17189                             BinOp.getOperand(0), NewExtIndex);
17190     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17191                             BinOp.getOperand(1), NewExtIndex);
17192     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17193                                       BinOp.getNode()->getFlags());
17194     return DAG.getBitcast(VT, NarrowBinOp);
17195   }
17196 
17197   // Only handle the case where we are doubling and then halving. A larger ratio
17198   // may require more than two narrow binops to replace the wide binop.
17199   if (NarrowingRatio != 2)
17200     return SDValue();
17201 
17202   // TODO: The motivating case for this transform is an x86 AVX1 target. That
17203   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17204   // flavors, but no other 256-bit integer support. This could be extended to
17205   // handle any binop, but that may require fixing/adding other folds to avoid
17206   // codegen regressions.
17207   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17208     return SDValue();
17209 
17210   // We need at least one concatenation operation of a binop operand to make
17211   // this transform worthwhile. The concat must double the input vector sizes.
17212   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
17213   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
17214   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
17215   bool ConcatL =
17216       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17217   bool ConcatR =
17218       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17219   if (!ConcatL && !ConcatR)
17220     return SDValue();
17221 
17222   // If one of the binop operands was not the result of a concat, we must
17223   // extract a half-sized operand for our new narrow binop.
17224   SDLoc DL(Extract);
17225 
17226   // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17227   // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
17228   // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
17229   SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17230                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17231                                     BinOp.getOperand(0),
17232                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17233 
17234   SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17235                       : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17236                                     BinOp.getOperand(1),
17237                                     DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
17238 
17239   SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17240   return DAG.getBitcast(VT, NarrowBinOp);
17241 }
17242 
17243 /// If we are extracting a subvector from a wide vector load, convert to a
17244 /// narrow load to eliminate the extraction:
17245 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17246 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17247   // TODO: Add support for big-endian. The offset calculation must be adjusted.
17248   if (DAG.getDataLayout().isBigEndian())
17249     return SDValue();
17250 
17251   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17252   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17253   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17254     return SDValue();
17255 
17256   // Allow targets to opt-out.
17257   EVT VT = Extract->getValueType(0);
17258   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17259   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17260     return SDValue();
17261 
17262   // The narrow load will be offset from the base address of the old load if
17263   // we are extracting from something besides index 0 (little-endian).
17264   SDLoc DL(Extract);
17265   SDValue BaseAddr = Ld->getOperand(1);
17266   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17267 
17268   // TODO: Use "BaseIndexOffset" to make this more effective.
17269   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17270   MachineFunction &MF = DAG.getMachineFunction();
17271   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17272                                                    VT.getStoreSize());
17273   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17274   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17275   return NewLd;
17276 }
17277 
17278 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
17279   EVT NVT = N->getValueType(0);
17280   SDValue V = N->getOperand(0);
17281 
17282   // Extract from UNDEF is UNDEF.
17283   if (V.isUndef())
17284     return DAG.getUNDEF(NVT);
17285 
17286   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17287     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17288       return NarrowLoad;
17289 
17290   // Combine an extract of an extract into a single extract_subvector.
17291   // ext (ext X, C), 0 --> ext X, C
17292   if (isNullConstant(N->getOperand(1)) &&
17293       V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse() &&
17294       isa<ConstantSDNode>(V.getOperand(1))) {
17295     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
17296                                     V.getConstantOperandVal(1)) &&
17297         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
17298       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
17299                          V.getOperand(1));
17300     }
17301   }
17302 
17303   // Combine:
17304   //    (extract_subvec (concat V1, V2, ...), i)
17305   // Into:
17306   //    Vi if possible
17307   // Only operand 0 is checked as 'concat' assumes all inputs of the same
17308   // type.
17309   if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17310       isa<ConstantSDNode>(N->getOperand(1)) &&
17311       V.getOperand(0).getValueType() == NVT) {
17312     unsigned Idx = N->getConstantOperandVal(1);
17313     unsigned NumElems = NVT.getVectorNumElements();
17314     assert((Idx % NumElems) == 0 &&
17315            "IDX in concat is not a multiple of the result vector length.");
17316     return V->getOperand(Idx / NumElems);
17317   }
17318 
17319   V = peekThroughBitcasts(V);
17320 
17321   // If the input is a build vector. Try to make a smaller build vector.
17322   if (V.getOpcode() == ISD::BUILD_VECTOR) {
17323     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17324       EVT InVT = V.getValueType();
17325       unsigned ExtractSize = NVT.getSizeInBits();
17326       unsigned EltSize = InVT.getScalarSizeInBits();
17327       // Only do this if we won't split any elements.
17328       if (ExtractSize % EltSize == 0) {
17329         unsigned NumElems = ExtractSize / EltSize;
17330         EVT EltVT = InVT.getVectorElementType();
17331         EVT ExtractVT = NumElems == 1 ? EltVT :
17332           EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
17333         if ((Level < AfterLegalizeDAG ||
17334              (NumElems == 1 ||
17335               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17336             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17337           unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
17338                             EltSize;
17339           if (NumElems == 1) {
17340             SDValue Src = V->getOperand(IdxVal);
17341             if (EltVT != Src.getValueType())
17342               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17343 
17344             return DAG.getBitcast(NVT, Src);
17345           }
17346 
17347           // Extract the pieces from the original build_vector.
17348           SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
17349                                             makeArrayRef(V->op_begin() + IdxVal,
17350                                                          NumElems));
17351           return DAG.getBitcast(NVT, BuildVec);
17352         }
17353       }
17354     }
17355   }
17356 
17357   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17358     // Handle only simple case where vector being inserted and vector
17359     // being extracted are of same size.
17360     EVT SmallVT = V.getOperand(1).getValueType();
17361     if (!NVT.bitsEq(SmallVT))
17362       return SDValue();
17363 
17364     // Only handle cases where both indexes are constants.
17365     auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17366     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17367 
17368     if (InsIdx && ExtIdx) {
17369       // Combine:
17370       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17371       // Into:
17372       //    indices are equal or bit offsets are equal => V1
17373       //    otherwise => (extract_subvec V1, ExtIdx)
17374       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17375           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17376         return DAG.getBitcast(NVT, V.getOperand(1));
17377       return DAG.getNode(
17378           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17379           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17380                          N->getOperand(1));
17381     }
17382   }
17383 
17384   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17385     return NarrowBOp;
17386 
17387   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17388     return SDValue(N, 0);
17389 
17390   return SDValue();
17391 }
17392 
17393 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17394 // or turn a shuffle of a single concat into simpler shuffle then concat.
17395 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17396   EVT VT = N->getValueType(0);
17397   unsigned NumElts = VT.getVectorNumElements();
17398 
17399   SDValue N0 = N->getOperand(0);
17400   SDValue N1 = N->getOperand(1);
17401   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17402   ArrayRef<int> Mask = SVN->getMask();
17403 
17404   SmallVector<SDValue, 4> Ops;
17405   EVT ConcatVT = N0.getOperand(0).getValueType();
17406   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17407   unsigned NumConcats = NumElts / NumElemsPerConcat;
17408 
17409   auto IsUndefMaskElt = [](int i) { return i == -1; };
17410 
17411   // Special case: shuffle(concat(A,B)) can be more efficiently represented
17412   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17413   // half vector elements.
17414   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17415       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
17416                    IsUndefMaskElt)) {
17417     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
17418                               N0.getOperand(1),
17419                               Mask.slice(0, NumElemsPerConcat));
17420     N1 = DAG.getUNDEF(ConcatVT);
17421     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17422   }
17423 
17424   // Look at every vector that's inserted. We're looking for exact
17425   // subvector-sized copies from a concatenated vector
17426   for (unsigned I = 0; I != NumConcats; ++I) {
17427     unsigned Begin = I * NumElemsPerConcat;
17428     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
17429 
17430     // Make sure we're dealing with a copy.
17431     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
17432       Ops.push_back(DAG.getUNDEF(ConcatVT));
17433       continue;
17434     }
17435 
17436     int OpIdx = -1;
17437     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
17438       if (IsUndefMaskElt(SubMask[i]))
17439         continue;
17440       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
17441         return SDValue();
17442       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
17443       if (0 <= OpIdx && EltOpIdx != OpIdx)
17444         return SDValue();
17445       OpIdx = EltOpIdx;
17446     }
17447     assert(0 <= OpIdx && "Unknown concat_vectors op");
17448 
17449     if (OpIdx < (int)N0.getNumOperands())
17450       Ops.push_back(N0.getOperand(OpIdx));
17451     else
17452       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
17453   }
17454 
17455   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17456 }
17457 
17458 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17459 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17460 //
17461 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17462 // a simplification in some sense, but it isn't appropriate in general: some
17463 // BUILD_VECTORs are substantially cheaper than others. The general case
17464 // of a BUILD_VECTOR requires inserting each element individually (or
17465 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17466 // all constants is a single constant pool load.  A BUILD_VECTOR where each
17467 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
17468 // are undef lowers to a small number of element insertions.
17469 //
17470 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17471 // We don't fold shuffles where one side is a non-zero constant, and we don't
17472 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17473 // non-constant operands. This seems to work out reasonably well in practice.
17474 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17475                                        SelectionDAG &DAG,
17476                                        const TargetLowering &TLI) {
17477   EVT VT = SVN->getValueType(0);
17478   unsigned NumElts = VT.getVectorNumElements();
17479   SDValue N0 = SVN->getOperand(0);
17480   SDValue N1 = SVN->getOperand(1);
17481 
17482   if (!N0->hasOneUse())
17483     return SDValue();
17484 
17485   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17486   // discussed above.
17487   if (!N1.isUndef()) {
17488     if (!N1->hasOneUse())
17489       return SDValue();
17490 
17491     bool N0AnyConst = isAnyConstantBuildVector(N0);
17492     bool N1AnyConst = isAnyConstantBuildVector(N1);
17493     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17494       return SDValue();
17495     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17496       return SDValue();
17497   }
17498 
17499   // If both inputs are splats of the same value then we can safely merge this
17500   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17501   bool IsSplat = false;
17502   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17503   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17504   if (BV0 && BV1)
17505     if (SDValue Splat0 = BV0->getSplatValue())
17506       IsSplat = (Splat0 == BV1->getSplatValue());
17507 
17508   SmallVector<SDValue, 8> Ops;
17509   SmallSet<SDValue, 16> DuplicateOps;
17510   for (int M : SVN->getMask()) {
17511     SDValue Op = DAG.getUNDEF(VT.getScalarType());
17512     if (M >= 0) {
17513       int Idx = M < (int)NumElts ? M : M - NumElts;
17514       SDValue &S = (M < (int)NumElts ? N0 : N1);
17515       if (S.getOpcode() == ISD::BUILD_VECTOR) {
17516         Op = S.getOperand(Idx);
17517       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17518         assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
17519         Op = S.getOperand(0);
17520       } else {
17521         // Operand can't be combined - bail out.
17522         return SDValue();
17523       }
17524     }
17525 
17526     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17527     // generating a splat; semantically, this is fine, but it's likely to
17528     // generate low-quality code if the target can't reconstruct an appropriate
17529     // shuffle.
17530     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17531       if (!IsSplat && !DuplicateOps.insert(Op).second)
17532         return SDValue();
17533 
17534     Ops.push_back(Op);
17535   }
17536 
17537   // BUILD_VECTOR requires all inputs to be of the same type, find the
17538   // maximum type and extend them all.
17539   EVT SVT = VT.getScalarType();
17540   if (SVT.isInteger())
17541     for (SDValue &Op : Ops)
17542       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17543   if (SVT != VT.getScalarType())
17544     for (SDValue &Op : Ops)
17545       Op = TLI.isZExtFree(Op.getValueType(), SVT)
17546                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17547                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17548   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17549 }
17550 
17551 // Match shuffles that can be converted to any_vector_extend_in_reg.
17552 // This is often generated during legalization.
17553 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17554 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17555 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17556                                             SelectionDAG &DAG,
17557                                             const TargetLowering &TLI,
17558                                             bool LegalOperations) {
17559   EVT VT = SVN->getValueType(0);
17560   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17561 
17562   // TODO Add support for big-endian when we have a test case.
17563   if (!VT.isInteger() || IsBigEndian)
17564     return SDValue();
17565 
17566   unsigned NumElts = VT.getVectorNumElements();
17567   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17568   ArrayRef<int> Mask = SVN->getMask();
17569   SDValue N0 = SVN->getOperand(0);
17570 
17571   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17572   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17573     for (unsigned i = 0; i != NumElts; ++i) {
17574       if (Mask[i] < 0)
17575         continue;
17576       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17577         continue;
17578       return false;
17579     }
17580     return true;
17581   };
17582 
17583   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17584   // power-of-2 extensions as they are the most likely.
17585   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17586     // Check for non power of 2 vector sizes
17587     if (NumElts % Scale != 0)
17588       continue;
17589     if (!isAnyExtend(Scale))
17590       continue;
17591 
17592     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17593     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17594     // Never create an illegal type. Only create unsupported operations if we
17595     // are pre-legalization.
17596     if (TLI.isTypeLegal(OutVT))
17597       if (!LegalOperations ||
17598           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17599         return DAG.getBitcast(VT,
17600                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17601                                           SDLoc(SVN), OutVT, N0));
17602   }
17603 
17604   return SDValue();
17605 }
17606 
17607 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17608 // each source element of a large type into the lowest elements of a smaller
17609 // destination type. This is often generated during legalization.
17610 // If the source node itself was a '*_extend_vector_inreg' node then we should
17611 // then be able to remove it.
17612 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17613                                         SelectionDAG &DAG) {
17614   EVT VT = SVN->getValueType(0);
17615   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17616 
17617   // TODO Add support for big-endian when we have a test case.
17618   if (!VT.isInteger() || IsBigEndian)
17619     return SDValue();
17620 
17621   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17622 
17623   unsigned Opcode = N0.getOpcode();
17624   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17625       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17626       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17627     return SDValue();
17628 
17629   SDValue N00 = N0.getOperand(0);
17630   ArrayRef<int> Mask = SVN->getMask();
17631   unsigned NumElts = VT.getVectorNumElements();
17632   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17633   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17634   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17635 
17636   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17637     return SDValue();
17638   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17639 
17640   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17641   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17642   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17643   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17644     for (unsigned i = 0; i != NumElts; ++i) {
17645       if (Mask[i] < 0)
17646         continue;
17647       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17648         continue;
17649       return false;
17650     }
17651     return true;
17652   };
17653 
17654   // At the moment we just handle the case where we've truncated back to the
17655   // same size as before the extension.
17656   // TODO: handle more extension/truncation cases as cases arise.
17657   if (EltSizeInBits != ExtSrcSizeInBits)
17658     return SDValue();
17659 
17660   // We can remove *extend_vector_inreg only if the truncation happens at
17661   // the same scale as the extension.
17662   if (isTruncate(ExtScale))
17663     return DAG.getBitcast(VT, N00);
17664 
17665   return SDValue();
17666 }
17667 
17668 // Combine shuffles of splat-shuffles of the form:
17669 // shuffle (shuffle V, undef, splat-mask), undef, M
17670 // If splat-mask contains undef elements, we need to be careful about
17671 // introducing undef's in the folded mask which are not the result of composing
17672 // the masks of the shuffles.
17673 static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
17674                                      ShuffleVectorSDNode *Splat,
17675                                      SelectionDAG &DAG) {
17676   ArrayRef<int> SplatMask = Splat->getMask();
17677   assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
17678 
17679   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
17680   // every undef mask element in the splat-shuffle has a corresponding undef
17681   // element in the user-shuffle's mask or if the composition of mask elements
17682   // would result in undef.
17683   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
17684   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
17685   //   In this case it is not legal to simplify to the splat-shuffle because we
17686   //   may be exposing the users of the shuffle an undef element at index 1
17687   //   which was not there before the combine.
17688   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
17689   //   In this case the composition of masks yields SplatMask, so it's ok to
17690   //   simplify to the splat-shuffle.
17691   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
17692   //   In this case the composed mask includes all undef elements of SplatMask
17693   //   and in addition sets element zero to undef. It is safe to simplify to
17694   //   the splat-shuffle.
17695   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
17696                                        ArrayRef<int> SplatMask) {
17697     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
17698       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
17699           SplatMask[UserMask[i]] != -1)
17700         return false;
17701     return true;
17702   };
17703   if (CanSimplifyToExistingSplat(UserMask, SplatMask))
17704     return SDValue(Splat, 0);
17705 
17706   // Create a new shuffle with a mask that is composed of the two shuffles'
17707   // masks.
17708   SmallVector<int, 32> NewMask;
17709   for (int Idx : UserMask)
17710     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
17711 
17712   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
17713                               Splat->getOperand(0), Splat->getOperand(1),
17714                               NewMask);
17715 }
17716 
17717 /// If the shuffle mask is taking exactly one element from the first vector
17718 /// operand and passing through all other elements from the second vector
17719 /// operand, return the index of the mask element that is choosing an element
17720 /// from the first operand. Otherwise, return -1.
17721 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
17722   int MaskSize = Mask.size();
17723   int EltFromOp0 = -1;
17724   // TODO: This does not match if there are undef elements in the shuffle mask.
17725   // Should we ignore undefs in the shuffle mask instead? The trade-off is
17726   // removing an instruction (a shuffle), but losing the knowledge that some
17727   // vector lanes are not needed.
17728   for (int i = 0; i != MaskSize; ++i) {
17729     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
17730       // We're looking for a shuffle of exactly one element from operand 0.
17731       if (EltFromOp0 != -1)
17732         return -1;
17733       EltFromOp0 = i;
17734     } else if (Mask[i] != i + MaskSize) {
17735       // Nothing from operand 1 can change lanes.
17736       return -1;
17737     }
17738   }
17739   return EltFromOp0;
17740 }
17741 
17742 /// If a shuffle inserts exactly one element from a source vector operand into
17743 /// another vector operand and we can access the specified element as a scalar,
17744 /// then we can eliminate the shuffle.
17745 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
17746                                       SelectionDAG &DAG) {
17747   // First, check if we are taking one element of a vector and shuffling that
17748   // element into another vector.
17749   ArrayRef<int> Mask = Shuf->getMask();
17750   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
17751   SDValue Op0 = Shuf->getOperand(0);
17752   SDValue Op1 = Shuf->getOperand(1);
17753   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
17754   if (ShufOp0Index == -1) {
17755     // Commute mask and check again.
17756     ShuffleVectorSDNode::commuteMask(CommutedMask);
17757     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
17758     if (ShufOp0Index == -1)
17759       return SDValue();
17760     // Commute operands to match the commuted shuffle mask.
17761     std::swap(Op0, Op1);
17762     Mask = CommutedMask;
17763   }
17764 
17765   // The shuffle inserts exactly one element from operand 0 into operand 1.
17766   // Now see if we can access that element as a scalar via a real insert element
17767   // instruction.
17768   // TODO: We can try harder to locate the element as a scalar. Examples: it
17769   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
17770   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
17771          "Shuffle mask value must be from operand 0");
17772   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
17773     return SDValue();
17774 
17775   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
17776   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
17777     return SDValue();
17778 
17779   // There's an existing insertelement with constant insertion index, so we
17780   // don't need to check the legality/profitability of a replacement operation
17781   // that differs at most in the constant value. The target should be able to
17782   // lower any of those in a similar way. If not, legalization will expand this
17783   // to a scalar-to-vector plus shuffle.
17784   //
17785   // Note that the shuffle may move the scalar from the position that the insert
17786   // element used. Therefore, our new insert element occurs at the shuffle's
17787   // mask index value, not the insert's index value.
17788   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
17789   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
17790                                         Op0.getOperand(2).getValueType());
17791   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
17792                      Op1, Op0.getOperand(1), NewInsIndex);
17793 }
17794 
17795 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
17796   EVT VT = N->getValueType(0);
17797   unsigned NumElts = VT.getVectorNumElements();
17798 
17799   SDValue N0 = N->getOperand(0);
17800   SDValue N1 = N->getOperand(1);
17801 
17802   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
17803 
17804   // Canonicalize shuffle undef, undef -> undef
17805   if (N0.isUndef() && N1.isUndef())
17806     return DAG.getUNDEF(VT);
17807 
17808   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17809 
17810   // Canonicalize shuffle v, v -> v, undef
17811   if (N0 == N1) {
17812     SmallVector<int, 8> NewMask;
17813     for (unsigned i = 0; i != NumElts; ++i) {
17814       int Idx = SVN->getMaskElt(i);
17815       if (Idx >= (int)NumElts) Idx -= NumElts;
17816       NewMask.push_back(Idx);
17817     }
17818     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
17819   }
17820 
17821   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
17822   if (N0.isUndef())
17823     return DAG.getCommutedVectorShuffle(*SVN);
17824 
17825   // Remove references to rhs if it is undef
17826   if (N1.isUndef()) {
17827     bool Changed = false;
17828     SmallVector<int, 8> NewMask;
17829     for (unsigned i = 0; i != NumElts; ++i) {
17830       int Idx = SVN->getMaskElt(i);
17831       if (Idx >= (int)NumElts) {
17832         Idx = -1;
17833         Changed = true;
17834       }
17835       NewMask.push_back(Idx);
17836     }
17837     if (Changed)
17838       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
17839   }
17840 
17841   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
17842     return InsElt;
17843 
17844   // A shuffle of a single vector that is a splat can always be folded.
17845   if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
17846     if (N1->isUndef() && N0Shuf->isSplat())
17847       return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
17848 
17849   // If it is a splat, check if the argument vector is another splat or a
17850   // build_vector.
17851   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
17852     SDNode *V = N0.getNode();
17853 
17854     // If this is a bit convert that changes the element type of the vector but
17855     // not the number of vector elements, look through it.  Be careful not to
17856     // look though conversions that change things like v4f32 to v2f64.
17857     if (V->getOpcode() == ISD::BITCAST) {
17858       SDValue ConvInput = V->getOperand(0);
17859       if (ConvInput.getValueType().isVector() &&
17860           ConvInput.getValueType().getVectorNumElements() == NumElts)
17861         V = ConvInput.getNode();
17862     }
17863 
17864     if (V->getOpcode() == ISD::BUILD_VECTOR) {
17865       assert(V->getNumOperands() == NumElts &&
17866              "BUILD_VECTOR has wrong number of operands");
17867       SDValue Base;
17868       bool AllSame = true;
17869       for (unsigned i = 0; i != NumElts; ++i) {
17870         if (!V->getOperand(i).isUndef()) {
17871           Base = V->getOperand(i);
17872           break;
17873         }
17874       }
17875       // Splat of <u, u, u, u>, return <u, u, u, u>
17876       if (!Base.getNode())
17877         return N0;
17878       for (unsigned i = 0; i != NumElts; ++i) {
17879         if (V->getOperand(i) != Base) {
17880           AllSame = false;
17881           break;
17882         }
17883       }
17884       // Splat of <x, x, x, x>, return <x, x, x, x>
17885       if (AllSame)
17886         return N0;
17887 
17888       // Canonicalize any other splat as a build_vector.
17889       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
17890       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
17891       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
17892 
17893       // We may have jumped through bitcasts, so the type of the
17894       // BUILD_VECTOR may not match the type of the shuffle.
17895       if (V->getValueType(0) != VT)
17896         NewBV = DAG.getBitcast(VT, NewBV);
17897       return NewBV;
17898     }
17899   }
17900 
17901   // Simplify source operands based on shuffle mask.
17902   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17903     return SDValue(N, 0);
17904 
17905   // Match shuffles that can be converted to any_vector_extend_in_reg.
17906   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
17907     return V;
17908 
17909   // Combine "truncate_vector_in_reg" style shuffles.
17910   if (SDValue V = combineTruncationShuffle(SVN, DAG))
17911     return V;
17912 
17913   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
17914       Level < AfterLegalizeVectorOps &&
17915       (N1.isUndef() ||
17916       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
17917        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
17918     if (SDValue V = partitionShuffleOfConcats(N, DAG))
17919       return V;
17920   }
17921 
17922   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17923   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17924   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
17925     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
17926       return Res;
17927 
17928   // If this shuffle only has a single input that is a bitcasted shuffle,
17929   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
17930   // back to their original types.
17931   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
17932       N1.isUndef() && Level < AfterLegalizeVectorOps &&
17933       TLI.isTypeLegal(VT)) {
17934     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
17935       if (Scale == 1)
17936         return SmallVector<int, 8>(Mask.begin(), Mask.end());
17937 
17938       SmallVector<int, 8> NewMask;
17939       for (int M : Mask)
17940         for (int s = 0; s != Scale; ++s)
17941           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
17942       return NewMask;
17943     };
17944 
17945     SDValue BC0 = peekThroughOneUseBitcasts(N0);
17946     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
17947       EVT SVT = VT.getScalarType();
17948       EVT InnerVT = BC0->getValueType(0);
17949       EVT InnerSVT = InnerVT.getScalarType();
17950 
17951       // Determine which shuffle works with the smaller scalar type.
17952       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
17953       EVT ScaleSVT = ScaleVT.getScalarType();
17954 
17955       if (TLI.isTypeLegal(ScaleVT) &&
17956           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
17957           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
17958         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17959         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
17960 
17961         // Scale the shuffle masks to the smaller scalar type.
17962         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
17963         SmallVector<int, 8> InnerMask =
17964             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
17965         SmallVector<int, 8> OuterMask =
17966             ScaleShuffleMask(SVN->getMask(), OuterScale);
17967 
17968         // Merge the shuffle masks.
17969         SmallVector<int, 8> NewMask;
17970         for (int M : OuterMask)
17971           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
17972 
17973         // Test for shuffle mask legality over both commutations.
17974         SDValue SV0 = BC0->getOperand(0);
17975         SDValue SV1 = BC0->getOperand(1);
17976         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17977         if (!LegalMask) {
17978           std::swap(SV0, SV1);
17979           ShuffleVectorSDNode::commuteMask(NewMask);
17980           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
17981         }
17982 
17983         if (LegalMask) {
17984           SV0 = DAG.getBitcast(ScaleVT, SV0);
17985           SV1 = DAG.getBitcast(ScaleVT, SV1);
17986           return DAG.getBitcast(
17987               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
17988         }
17989       }
17990     }
17991   }
17992 
17993   // Canonicalize shuffles according to rules:
17994   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
17995   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
17996   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
17997   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
17998       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
17999       TLI.isTypeLegal(VT)) {
18000     // The incoming shuffle must be of the same type as the result of the
18001     // current shuffle.
18002     assert(N1->getOperand(0).getValueType() == VT &&
18003            "Shuffle types don't match");
18004 
18005     SDValue SV0 = N1->getOperand(0);
18006     SDValue SV1 = N1->getOperand(1);
18007     bool HasSameOp0 = N0 == SV0;
18008     bool IsSV1Undef = SV1.isUndef();
18009     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18010       // Commute the operands of this shuffle so that next rule
18011       // will trigger.
18012       return DAG.getCommutedVectorShuffle(*SVN);
18013   }
18014 
18015   // Try to fold according to rules:
18016   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18017   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18018   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18019   // Don't try to fold shuffles with illegal type.
18020   // Only fold if this shuffle is the only user of the other shuffle.
18021   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18022       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18023     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18024 
18025     // Don't try to fold splats; they're likely to simplify somehow, or they
18026     // might be free.
18027     if (OtherSV->isSplat())
18028       return SDValue();
18029 
18030     // The incoming shuffle must be of the same type as the result of the
18031     // current shuffle.
18032     assert(OtherSV->getOperand(0).getValueType() == VT &&
18033            "Shuffle types don't match");
18034 
18035     SDValue SV0, SV1;
18036     SmallVector<int, 4> Mask;
18037     // Compute the combined shuffle mask for a shuffle with SV0 as the first
18038     // operand, and SV1 as the second operand.
18039     for (unsigned i = 0; i != NumElts; ++i) {
18040       int Idx = SVN->getMaskElt(i);
18041       if (Idx < 0) {
18042         // Propagate Undef.
18043         Mask.push_back(Idx);
18044         continue;
18045       }
18046 
18047       SDValue CurrentVec;
18048       if (Idx < (int)NumElts) {
18049         // This shuffle index refers to the inner shuffle N0. Lookup the inner
18050         // shuffle mask to identify which vector is actually referenced.
18051         Idx = OtherSV->getMaskElt(Idx);
18052         if (Idx < 0) {
18053           // Propagate Undef.
18054           Mask.push_back(Idx);
18055           continue;
18056         }
18057 
18058         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18059                                            : OtherSV->getOperand(1);
18060       } else {
18061         // This shuffle index references an element within N1.
18062         CurrentVec = N1;
18063       }
18064 
18065       // Simple case where 'CurrentVec' is UNDEF.
18066       if (CurrentVec.isUndef()) {
18067         Mask.push_back(-1);
18068         continue;
18069       }
18070 
18071       // Canonicalize the shuffle index. We don't know yet if CurrentVec
18072       // will be the first or second operand of the combined shuffle.
18073       Idx = Idx % NumElts;
18074       if (!SV0.getNode() || SV0 == CurrentVec) {
18075         // Ok. CurrentVec is the left hand side.
18076         // Update the mask accordingly.
18077         SV0 = CurrentVec;
18078         Mask.push_back(Idx);
18079         continue;
18080       }
18081 
18082       // Bail out if we cannot convert the shuffle pair into a single shuffle.
18083       if (SV1.getNode() && SV1 != CurrentVec)
18084         return SDValue();
18085 
18086       // Ok. CurrentVec is the right hand side.
18087       // Update the mask accordingly.
18088       SV1 = CurrentVec;
18089       Mask.push_back(Idx + NumElts);
18090     }
18091 
18092     // Check if all indices in Mask are Undef. In case, propagate Undef.
18093     bool isUndefMask = true;
18094     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
18095       isUndefMask &= Mask[i] < 0;
18096 
18097     if (isUndefMask)
18098       return DAG.getUNDEF(VT);
18099 
18100     if (!SV0.getNode())
18101       SV0 = DAG.getUNDEF(VT);
18102     if (!SV1.getNode())
18103       SV1 = DAG.getUNDEF(VT);
18104 
18105     // Avoid introducing shuffles with illegal mask.
18106     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
18107       ShuffleVectorSDNode::commuteMask(Mask);
18108 
18109       if (!TLI.isShuffleMaskLegal(Mask, VT))
18110         return SDValue();
18111 
18112       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18113       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18114       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18115       std::swap(SV0, SV1);
18116     }
18117 
18118     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18119     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18120     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18121     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
18122   }
18123 
18124   return SDValue();
18125 }
18126 
18127 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
18128   SDValue InVal = N->getOperand(0);
18129   EVT VT = N->getValueType(0);
18130 
18131   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18132   // with a VECTOR_SHUFFLE and possible truncate.
18133   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18134     SDValue InVec = InVal->getOperand(0);
18135     SDValue EltNo = InVal->getOperand(1);
18136     auto InVecT = InVec.getValueType();
18137     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
18138       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
18139       int Elt = C0->getZExtValue();
18140       NewMask[0] = Elt;
18141       SDValue Val;
18142       // If we have an implict truncate do truncate here as long as it's legal.
18143       // if it's not legal, this should
18144       if (VT.getScalarType() != InVal.getValueType() &&
18145           InVal.getValueType().isScalarInteger() &&
18146           isTypeLegal(VT.getScalarType())) {
18147         Val =
18148             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
18149         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
18150       }
18151       if (VT.getScalarType() == InVecT.getScalarType() &&
18152           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
18153           TLI.isShuffleMaskLegal(NewMask, VT)) {
18154         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
18155                                    DAG.getUNDEF(InVecT), NewMask);
18156         // If the initial vector is the correct size this shuffle is a
18157         // valid result.
18158         if (VT == InVecT)
18159           return Val;
18160         // If not we must truncate the vector.
18161         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
18162           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
18163           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
18164           EVT SubVT =
18165               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
18166                                VT.getVectorNumElements());
18167           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
18168                             ZeroIdx);
18169           return Val;
18170         }
18171       }
18172     }
18173   }
18174 
18175   return SDValue();
18176 }
18177 
18178 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
18179   EVT VT = N->getValueType(0);
18180   SDValue N0 = N->getOperand(0);
18181   SDValue N1 = N->getOperand(1);
18182   SDValue N2 = N->getOperand(2);
18183 
18184   // If inserting an UNDEF, just return the original vector.
18185   if (N1.isUndef())
18186     return N0;
18187 
18188   // If this is an insert of an extracted vector into an undef vector, we can
18189   // just use the input to the extract.
18190   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18191       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
18192     return N1.getOperand(0);
18193 
18194   // If we are inserting a bitcast value into an undef, with the same
18195   // number of elements, just use the bitcast input of the extract.
18196   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18197   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18198   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
18199       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18200       N1.getOperand(0).getOperand(1) == N2 &&
18201       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18202           VT.getVectorNumElements() &&
18203       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18204           VT.getSizeInBits()) {
18205     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
18206   }
18207 
18208   // If both N1 and N2 are bitcast values on which insert_subvector
18209   // would makes sense, pull the bitcast through.
18210   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18211   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18212   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
18213     SDValue CN0 = N0.getOperand(0);
18214     SDValue CN1 = N1.getOperand(0);
18215     EVT CN0VT = CN0.getValueType();
18216     EVT CN1VT = CN1.getValueType();
18217     if (CN0VT.isVector() && CN1VT.isVector() &&
18218         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
18219         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
18220       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
18221                                       CN0.getValueType(), CN0, CN1, N2);
18222       return DAG.getBitcast(VT, NewINSERT);
18223     }
18224   }
18225 
18226   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18227   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18228   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18229   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
18230       N0.getOperand(1).getValueType() == N1.getValueType() &&
18231       N0.getOperand(2) == N2)
18232     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18233                        N1, N2);
18234 
18235   // Eliminate an intermediate insert into an undef vector:
18236   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18237   // insert_subvector undef, X, N2
18238   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18239       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18240     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18241                        N1.getOperand(1), N2);
18242 
18243   if (!isa<ConstantSDNode>(N2))
18244     return SDValue();
18245 
18246   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18247 
18248   // Canonicalize insert_subvector dag nodes.
18249   // Example:
18250   // (insert_subvector (insert_subvector A, Idx0), Idx1)
18251   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18252   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18253       N1.getValueType() == N0.getOperand(1).getValueType() &&
18254       isa<ConstantSDNode>(N0.getOperand(2))) {
18255     unsigned OtherIdx = N0.getConstantOperandVal(2);
18256     if (InsIdx < OtherIdx) {
18257       // Swap nodes.
18258       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18259                                   N0.getOperand(0), N1, N2);
18260       AddToWorklist(NewOp.getNode());
18261       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18262                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18263     }
18264   }
18265 
18266   // If the input vector is a concatenation, and the insert replaces
18267   // one of the pieces, we can optimize into a single concat_vectors.
18268   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18269       N0.getOperand(0).getValueType() == N1.getValueType()) {
18270     unsigned Factor = N1.getValueType().getVectorNumElements();
18271 
18272     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18273     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18274 
18275     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18276   }
18277 
18278   // Simplify source operands based on insertion.
18279   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18280     return SDValue(N, 0);
18281 
18282   return SDValue();
18283 }
18284 
18285 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18286   SDValue N0 = N->getOperand(0);
18287 
18288   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18289   if (N0->getOpcode() == ISD::FP16_TO_FP)
18290     return N0->getOperand(0);
18291 
18292   return SDValue();
18293 }
18294 
18295 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18296   SDValue N0 = N->getOperand(0);
18297 
18298   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18299   if (N0->getOpcode() == ISD::AND) {
18300     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18301     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18302       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18303                          N0.getOperand(0));
18304     }
18305   }
18306 
18307   return SDValue();
18308 }
18309 
18310 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18311 /// with the destination vector and a zero vector.
18312 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18313 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
18314 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18315   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18316 
18317   EVT VT = N->getValueType(0);
18318   SDValue LHS = N->getOperand(0);
18319   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18320   SDLoc DL(N);
18321 
18322   // Make sure we're not running after operation legalization where it
18323   // may have custom lowered the vector shuffles.
18324   if (LegalOperations)
18325     return SDValue();
18326 
18327   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18328     return SDValue();
18329 
18330   EVT RVT = RHS.getValueType();
18331   unsigned NumElts = RHS.getNumOperands();
18332 
18333   // Attempt to create a valid clear mask, splitting the mask into
18334   // sub elements and checking to see if each is
18335   // all zeros or all ones - suitable for shuffle masking.
18336   auto BuildClearMask = [&](int Split) {
18337     int NumSubElts = NumElts * Split;
18338     int NumSubBits = RVT.getScalarSizeInBits() / Split;
18339 
18340     SmallVector<int, 8> Indices;
18341     for (int i = 0; i != NumSubElts; ++i) {
18342       int EltIdx = i / Split;
18343       int SubIdx = i % Split;
18344       SDValue Elt = RHS.getOperand(EltIdx);
18345       if (Elt.isUndef()) {
18346         Indices.push_back(-1);
18347         continue;
18348       }
18349 
18350       APInt Bits;
18351       if (isa<ConstantSDNode>(Elt))
18352         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18353       else if (isa<ConstantFPSDNode>(Elt))
18354         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18355       else
18356         return SDValue();
18357 
18358       // Extract the sub element from the constant bit mask.
18359       if (DAG.getDataLayout().isBigEndian()) {
18360         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18361       } else {
18362         Bits.lshrInPlace(SubIdx * NumSubBits);
18363       }
18364 
18365       if (Split > 1)
18366         Bits = Bits.trunc(NumSubBits);
18367 
18368       if (Bits.isAllOnesValue())
18369         Indices.push_back(i);
18370       else if (Bits == 0)
18371         Indices.push_back(i + NumSubElts);
18372       else
18373         return SDValue();
18374     }
18375 
18376     // Let's see if the target supports this vector_shuffle.
18377     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18378     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18379     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18380       return SDValue();
18381 
18382     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18383     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18384                                                    DAG.getBitcast(ClearVT, LHS),
18385                                                    Zero, Indices));
18386   };
18387 
18388   // Determine maximum split level (byte level masking).
18389   int MaxSplit = 1;
18390   if (RVT.getScalarSizeInBits() % 8 == 0)
18391     MaxSplit = RVT.getScalarSizeInBits() / 8;
18392 
18393   for (int Split = 1; Split <= MaxSplit; ++Split)
18394     if (RVT.getScalarSizeInBits() % Split == 0)
18395       if (SDValue S = BuildClearMask(Split))
18396         return S;
18397 
18398   return SDValue();
18399 }
18400 
18401 /// Visit a binary vector operation, like ADD.
18402 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18403   assert(N->getValueType(0).isVector() &&
18404          "SimplifyVBinOp only works on vectors!");
18405 
18406   SDValue LHS = N->getOperand(0);
18407   SDValue RHS = N->getOperand(1);
18408   SDValue Ops[] = {LHS, RHS};
18409   EVT VT = N->getValueType(0);
18410 
18411   // See if we can constant fold the vector operation.
18412   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18413           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18414     return Fold;
18415 
18416   // Type legalization might introduce new shuffles in the DAG.
18417   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
18418   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
18419   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
18420       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
18421       LHS.getOperand(1).isUndef() &&
18422       RHS.getOperand(1).isUndef()) {
18423     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
18424     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
18425 
18426     if (SVN0->getMask().equals(SVN1->getMask())) {
18427       SDValue UndefVector = LHS.getOperand(1);
18428       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
18429                                      LHS.getOperand(0), RHS.getOperand(0),
18430                                      N->getFlags());
18431       AddUsersToWorklist(N);
18432       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
18433                                   SVN0->getMask());
18434     }
18435   }
18436 
18437   // The following pattern is likely to emerge with vector reduction ops. Moving
18438   // the binary operation ahead of insertion may allow using a narrower vector
18439   // instruction that has better performance than the wide version of the op:
18440   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
18441   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
18442       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
18443       LHS.getOperand(2) == RHS.getOperand(2) &&
18444       (LHS.hasOneUse() || RHS.hasOneUse())) {
18445     SDValue X = LHS.getOperand(1);
18446     SDValue Y = RHS.getOperand(1);
18447     SDValue Z = LHS.getOperand(2);
18448     EVT NarrowVT = X.getValueType();
18449     if (NarrowVT == Y.getValueType() &&
18450         TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), NarrowVT)) {
18451       // (binop undef, undef) may not return undef, so compute that result.
18452       SDLoc DL(N);
18453       SDValue VecC = DAG.getNode(N->getOpcode(), DL, VT, DAG.getUNDEF(VT),
18454                                  DAG.getUNDEF(VT));
18455       SDValue NarrowBO = DAG.getNode(N->getOpcode(), DL, NarrowVT, X, Y);
18456       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
18457     }
18458   }
18459 
18460   return SDValue();
18461 }
18462 
18463 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18464                                     SDValue N2) {
18465   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18466 
18467   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18468                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
18469 
18470   // If we got a simplified select_cc node back from SimplifySelectCC, then
18471   // break it down into a new SETCC node, and a new SELECT node, and then return
18472   // the SELECT node, since we were called with a SELECT node.
18473   if (SCC.getNode()) {
18474     // Check to see if we got a select_cc back (to turn into setcc/select).
18475     // Otherwise, just return whatever node we got back, like fabs.
18476     if (SCC.getOpcode() == ISD::SELECT_CC) {
18477       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18478                                   N0.getValueType(),
18479                                   SCC.getOperand(0), SCC.getOperand(1),
18480                                   SCC.getOperand(4));
18481       AddToWorklist(SETCC.getNode());
18482       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18483                            SCC.getOperand(2), SCC.getOperand(3));
18484     }
18485 
18486     return SCC;
18487   }
18488   return SDValue();
18489 }
18490 
18491 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18492 /// being selected between, see if we can simplify the select.  Callers of this
18493 /// should assume that TheSelect is deleted if this returns true.  As such, they
18494 /// should return the appropriate thing (e.g. the node) back to the top-level of
18495 /// the DAG combiner loop to avoid it being looked at.
18496 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18497                                     SDValue RHS) {
18498   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18499   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18500   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18501     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18502       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18503       SDValue Sqrt = RHS;
18504       ISD::CondCode CC;
18505       SDValue CmpLHS;
18506       const ConstantFPSDNode *Zero = nullptr;
18507 
18508       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18509         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18510         CmpLHS = TheSelect->getOperand(0);
18511         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18512       } else {
18513         // SELECT or VSELECT
18514         SDValue Cmp = TheSelect->getOperand(0);
18515         if (Cmp.getOpcode() == ISD::SETCC) {
18516           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18517           CmpLHS = Cmp.getOperand(0);
18518           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18519         }
18520       }
18521       if (Zero && Zero->isZero() &&
18522           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18523           CC == ISD::SETULT || CC == ISD::SETLT)) {
18524         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18525         CombineTo(TheSelect, Sqrt);
18526         return true;
18527       }
18528     }
18529   }
18530   // Cannot simplify select with vector condition
18531   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
18532 
18533   // If this is a select from two identical things, try to pull the operation
18534   // through the select.
18535   if (LHS.getOpcode() != RHS.getOpcode() ||
18536       !LHS.hasOneUse() || !RHS.hasOneUse())
18537     return false;
18538 
18539   // If this is a load and the token chain is identical, replace the select
18540   // of two loads with a load through a select of the address to load from.
18541   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
18542   // constants have been dropped into the constant pool.
18543   if (LHS.getOpcode() == ISD::LOAD) {
18544     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
18545     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
18546 
18547     // Token chains must be identical.
18548     if (LHS.getOperand(0) != RHS.getOperand(0) ||
18549         // Do not let this transformation reduce the number of volatile loads.
18550         LLD->isVolatile() || RLD->isVolatile() ||
18551         // FIXME: If either is a pre/post inc/dec load,
18552         // we'd need to split out the address adjustment.
18553         LLD->isIndexed() || RLD->isIndexed() ||
18554         // If this is an EXTLOAD, the VT's must match.
18555         LLD->getMemoryVT() != RLD->getMemoryVT() ||
18556         // If this is an EXTLOAD, the kind of extension must match.
18557         (LLD->getExtensionType() != RLD->getExtensionType() &&
18558          // The only exception is if one of the extensions is anyext.
18559          LLD->getExtensionType() != ISD::EXTLOAD &&
18560          RLD->getExtensionType() != ISD::EXTLOAD) ||
18561         // FIXME: this discards src value information.  This is
18562         // over-conservative. It would be beneficial to be able to remember
18563         // both potential memory locations.  Since we are discarding
18564         // src value info, don't do the transformation if the memory
18565         // locations are not in the default address space.
18566         LLD->getPointerInfo().getAddrSpace() != 0 ||
18567         RLD->getPointerInfo().getAddrSpace() != 0 ||
18568         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
18569                                       LLD->getBasePtr().getValueType()))
18570       return false;
18571 
18572     // The loads must not depend on one another.
18573     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
18574       return false;
18575 
18576     // Check that the select condition doesn't reach either load.  If so,
18577     // folding this will induce a cycle into the DAG.  If not, this is safe to
18578     // xform, so create a select of the addresses.
18579 
18580     SmallPtrSet<const SDNode *, 32> Visited;
18581     SmallVector<const SDNode *, 16> Worklist;
18582 
18583     // Always fail if LLD and RLD are not independent. TheSelect is a
18584     // predecessor to all Nodes in question so we need not search past it.
18585 
18586     Visited.insert(TheSelect);
18587     Worklist.push_back(LLD);
18588     Worklist.push_back(RLD);
18589 
18590     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
18591         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
18592       return false;
18593 
18594     SDValue Addr;
18595     if (TheSelect->getOpcode() == ISD::SELECT) {
18596       // We cannot do this optimization if any pair of {RLD, LLD} is a
18597       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
18598       // Loads, we only need to check if CondNode is a successor to one of the
18599       // loads. We can further avoid this if there's no use of their chain
18600       // value.
18601       SDNode *CondNode = TheSelect->getOperand(0).getNode();
18602       Worklist.push_back(CondNode);
18603 
18604       if ((LLD->hasAnyUseOfValue(1) &&
18605            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18606           (RLD->hasAnyUseOfValue(1) &&
18607            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18608         return false;
18609 
18610       Addr = DAG.getSelect(SDLoc(TheSelect),
18611                            LLD->getBasePtr().getValueType(),
18612                            TheSelect->getOperand(0), LLD->getBasePtr(),
18613                            RLD->getBasePtr());
18614     } else {  // Otherwise SELECT_CC
18615       // We cannot do this optimization if any pair of {RLD, LLD} is a
18616       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
18617       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
18618       // one of the loads. We can further avoid this if there's no use of their
18619       // chain value.
18620 
18621       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
18622       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
18623       Worklist.push_back(CondLHS);
18624       Worklist.push_back(CondRHS);
18625 
18626       if ((LLD->hasAnyUseOfValue(1) &&
18627            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
18628           (RLD->hasAnyUseOfValue(1) &&
18629            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
18630         return false;
18631 
18632       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
18633                          LLD->getBasePtr().getValueType(),
18634                          TheSelect->getOperand(0),
18635                          TheSelect->getOperand(1),
18636                          LLD->getBasePtr(), RLD->getBasePtr(),
18637                          TheSelect->getOperand(4));
18638     }
18639 
18640     SDValue Load;
18641     // It is safe to replace the two loads if they have different alignments,
18642     // but the new load must be the minimum (most restrictive) alignment of the
18643     // inputs.
18644     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
18645     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
18646     if (!RLD->isInvariant())
18647       MMOFlags &= ~MachineMemOperand::MOInvariant;
18648     if (!RLD->isDereferenceable())
18649       MMOFlags &= ~MachineMemOperand::MODereferenceable;
18650     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
18651       // FIXME: Discards pointer and AA info.
18652       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
18653                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
18654                          MMOFlags);
18655     } else {
18656       // FIXME: Discards pointer and AA info.
18657       Load = DAG.getExtLoad(
18658           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
18659                                                   : LLD->getExtensionType(),
18660           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
18661           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
18662     }
18663 
18664     // Users of the select now use the result of the load.
18665     CombineTo(TheSelect, Load);
18666 
18667     // Users of the old loads now use the new load's chain.  We know the
18668     // old-load value is dead now.
18669     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
18670     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
18671     return true;
18672   }
18673 
18674   return false;
18675 }
18676 
18677 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
18678 /// bitwise 'and'.
18679 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
18680                                             SDValue N1, SDValue N2, SDValue N3,
18681                                             ISD::CondCode CC) {
18682   // If this is a select where the false operand is zero and the compare is a
18683   // check of the sign bit, see if we can perform the "gzip trick":
18684   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
18685   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
18686   EVT XType = N0.getValueType();
18687   EVT AType = N2.getValueType();
18688   if (!isNullConstant(N3) || !XType.bitsGE(AType))
18689     return SDValue();
18690 
18691   // If the comparison is testing for a positive value, we have to invert
18692   // the sign bit mask, so only do that transform if the target has a bitwise
18693   // 'and not' instruction (the invert is free).
18694   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
18695     // (X > -1) ? A : 0
18696     // (X >  0) ? X : 0 <-- This is canonical signed max.
18697     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
18698       return SDValue();
18699   } else if (CC == ISD::SETLT) {
18700     // (X <  0) ? A : 0
18701     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
18702     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
18703       return SDValue();
18704   } else {
18705     return SDValue();
18706   }
18707 
18708   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
18709   // constant.
18710   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
18711   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18712   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
18713     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
18714     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
18715     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
18716     AddToWorklist(Shift.getNode());
18717 
18718     if (XType.bitsGT(AType)) {
18719       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18720       AddToWorklist(Shift.getNode());
18721     }
18722 
18723     if (CC == ISD::SETGT)
18724       Shift = DAG.getNOT(DL, Shift, AType);
18725 
18726     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18727   }
18728 
18729   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
18730   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
18731   AddToWorklist(Shift.getNode());
18732 
18733   if (XType.bitsGT(AType)) {
18734     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
18735     AddToWorklist(Shift.getNode());
18736   }
18737 
18738   if (CC == ISD::SETGT)
18739     Shift = DAG.getNOT(DL, Shift, AType);
18740 
18741   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
18742 }
18743 
18744 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
18745 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
18746 /// in it. This may be a win when the constant is not otherwise available
18747 /// because it replaces two constant pool loads with one.
18748 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
18749     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
18750     ISD::CondCode CC) {
18751   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
18752     return SDValue();
18753 
18754   // If we are before legalize types, we want the other legalization to happen
18755   // first (for example, to avoid messing with soft float).
18756   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
18757   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
18758   EVT VT = N2.getValueType();
18759   if (!TV || !FV || !TLI.isTypeLegal(VT))
18760     return SDValue();
18761 
18762   // If a constant can be materialized without loads, this does not make sense.
18763   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
18764       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
18765       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
18766     return SDValue();
18767 
18768   // If both constants have multiple uses, then we won't need to do an extra
18769   // load. The values are likely around in registers for other users.
18770   if (!TV->hasOneUse() && !FV->hasOneUse())
18771     return SDValue();
18772 
18773   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
18774                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
18775   Type *FPTy = Elts[0]->getType();
18776   const DataLayout &TD = DAG.getDataLayout();
18777 
18778   // Create a ConstantArray of the two constants.
18779   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
18780   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
18781                                       TD.getPrefTypeAlignment(FPTy));
18782   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
18783 
18784   // Get offsets to the 0 and 1 elements of the array, so we can select between
18785   // them.
18786   SDValue Zero = DAG.getIntPtrConstant(0, DL);
18787   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
18788   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
18789   SDValue Cond =
18790       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
18791   AddToWorklist(Cond.getNode());
18792   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
18793   AddToWorklist(CstOffset.getNode());
18794   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
18795   AddToWorklist(CPIdx.getNode());
18796   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
18797                      MachinePointerInfo::getConstantPool(
18798                          DAG.getMachineFunction()), Alignment);
18799 }
18800 
18801 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
18802 /// where 'cond' is the comparison specified by CC.
18803 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
18804                                       SDValue N2, SDValue N3, ISD::CondCode CC,
18805                                       bool NotExtCompare) {
18806   // (x ? y : y) -> y.
18807   if (N2 == N3) return N2;
18808 
18809   EVT CmpOpVT = N0.getValueType();
18810   EVT VT = N2.getValueType();
18811   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
18812   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
18813   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
18814 
18815   // Determine if the condition we're dealing with is constant.
18816   SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
18817                               false);
18818   if (SCC.getNode()) AddToWorklist(SCC.getNode());
18819 
18820   if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
18821     // fold select_cc true, x, y -> x
18822     // fold select_cc false, x, y -> y
18823     return !SCCC->isNullValue() ? N2 : N3;
18824   }
18825 
18826   if (SDValue V =
18827           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
18828     return V;
18829 
18830   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
18831     return V;
18832 
18833   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
18834   // where y is has a single bit set.
18835   // A plaintext description would be, we can turn the SELECT_CC into an AND
18836   // when the condition can be materialized as an all-ones register.  Any
18837   // single bit-test can be materialized as an all-ones register with
18838   // shift-left and shift-right-arith.
18839   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
18840       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
18841     SDValue AndLHS = N0->getOperand(0);
18842     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
18843     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
18844       // Shift the tested bit over the sign bit.
18845       const APInt &AndMask = ConstAndRHS->getAPIntValue();
18846       SDValue ShlAmt =
18847         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
18848                         getShiftAmountTy(AndLHS.getValueType()));
18849       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
18850 
18851       // Now arithmetic right shift it all the way over, so the result is either
18852       // all-ones, or zero.
18853       SDValue ShrAmt =
18854         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
18855                         getShiftAmountTy(Shl.getValueType()));
18856       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
18857 
18858       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
18859     }
18860   }
18861 
18862   // fold select C, 16, 0 -> shl C, 4
18863   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
18864   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
18865 
18866   if ((Fold || Swap) &&
18867       TLI.getBooleanContents(CmpOpVT) ==
18868           TargetLowering::ZeroOrOneBooleanContent &&
18869       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
18870 
18871     if (Swap) {
18872       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
18873       std::swap(N2C, N3C);
18874     }
18875 
18876     // If the caller doesn't want us to simplify this into a zext of a compare,
18877     // don't do it.
18878     if (NotExtCompare && N2C->isOne())
18879       return SDValue();
18880 
18881     SDValue Temp, SCC;
18882     // zext (setcc n0, n1)
18883     if (LegalTypes) {
18884       SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
18885       if (VT.bitsLT(SCC.getValueType()))
18886         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
18887       else
18888         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18889     } else {
18890       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
18891       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
18892     }
18893 
18894     AddToWorklist(SCC.getNode());
18895     AddToWorklist(Temp.getNode());
18896 
18897     if (N2C->isOne())
18898       return Temp;
18899 
18900     // shl setcc result by log2 n2c
18901     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
18902                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
18903                                        SDLoc(Temp),
18904                                        getShiftAmountTy(Temp.getValueType())));
18905   }
18906 
18907   // Check to see if this is an integer abs.
18908   // select_cc setg[te] X,  0,  X, -X ->
18909   // select_cc setgt    X, -1,  X, -X ->
18910   // select_cc setl[te] X,  0, -X,  X ->
18911   // select_cc setlt    X,  1, -X,  X ->
18912   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
18913   if (N1C) {
18914     ConstantSDNode *SubC = nullptr;
18915     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
18916          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
18917         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
18918       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
18919     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
18920               (N1C->isOne() && CC == ISD::SETLT)) &&
18921              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
18922       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
18923 
18924     if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
18925       SDLoc DL(N0);
18926       SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
18927                                   DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
18928                                                   DL,
18929                                                   getShiftAmountTy(CmpOpVT)));
18930       SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
18931       AddToWorklist(Shift.getNode());
18932       AddToWorklist(Add.getNode());
18933       return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
18934     }
18935   }
18936 
18937   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
18938   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
18939   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
18940   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
18941   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
18942   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
18943   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
18944   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
18945   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
18946     SDValue ValueOnZero = N2;
18947     SDValue Count = N3;
18948     // If the condition is NE instead of E, swap the operands.
18949     if (CC == ISD::SETNE)
18950       std::swap(ValueOnZero, Count);
18951     // Check if the value on zero is a constant equal to the bits in the type.
18952     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
18953       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
18954         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
18955         // legal, combine to just cttz.
18956         if ((Count.getOpcode() == ISD::CTTZ ||
18957              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
18958             N0 == Count.getOperand(0) &&
18959             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
18960           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
18961         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
18962         // legal, combine to just ctlz.
18963         if ((Count.getOpcode() == ISD::CTLZ ||
18964              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
18965             N0 == Count.getOperand(0) &&
18966             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
18967           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
18968       }
18969     }
18970   }
18971 
18972   return SDValue();
18973 }
18974 
18975 /// This is a stub for TargetLowering::SimplifySetCC.
18976 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
18977                                    ISD::CondCode Cond, const SDLoc &DL,
18978                                    bool foldBooleans) {
18979   TargetLowering::DAGCombinerInfo
18980     DagCombineInfo(DAG, Level, false, this);
18981   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
18982 }
18983 
18984 /// Given an ISD::SDIV node expressing a divide by constant, return
18985 /// a DAG expression to select that will generate the same value by multiplying
18986 /// by a magic number.
18987 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
18988 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
18989   // when optimising for minimum size, we don't want to expand a div to a mul
18990   // and a shift.
18991   if (DAG.getMachineFunction().getFunction().optForMinSize())
18992     return SDValue();
18993 
18994   SmallVector<SDNode *, 8> Built;
18995   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
18996     for (SDNode *N : Built)
18997       AddToWorklist(N);
18998     return S;
18999   }
19000 
19001   return SDValue();
19002 }
19003 
19004 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19005 /// DAG expression that will generate the same value by right shifting.
19006 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
19007   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
19008   if (!C)
19009     return SDValue();
19010 
19011   // Avoid division by zero.
19012   if (C->isNullValue())
19013     return SDValue();
19014 
19015   SmallVector<SDNode *, 8> Built;
19016   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
19017     for (SDNode *N : Built)
19018       AddToWorklist(N);
19019     return S;
19020   }
19021 
19022   return SDValue();
19023 }
19024 
19025 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19026 /// expression that will generate the same value by multiplying by a magic
19027 /// number.
19028 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19029 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
19030   // when optimising for minimum size, we don't want to expand a div to a mul
19031   // and a shift.
19032   if (DAG.getMachineFunction().getFunction().optForMinSize())
19033     return SDValue();
19034 
19035   SmallVector<SDNode *, 8> Built;
19036   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
19037     for (SDNode *N : Built)
19038       AddToWorklist(N);
19039     return S;
19040   }
19041 
19042   return SDValue();
19043 }
19044 
19045 /// Determines the LogBase2 value for a non-null input value using the
19046 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19047 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
19048   EVT VT = V.getValueType();
19049   unsigned EltBits = VT.getScalarSizeInBits();
19050   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
19051   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
19052   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
19053   return LogBase2;
19054 }
19055 
19056 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19057 /// For the reciprocal, we need to find the zero of the function:
19058 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
19059 ///     =>
19060 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19061 ///     does not require additional intermediate precision]
19062 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
19063   if (Level >= AfterLegalizeDAG)
19064     return SDValue();
19065 
19066   // TODO: Handle half and/or extended types?
19067   EVT VT = Op.getValueType();
19068   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19069     return SDValue();
19070 
19071   // If estimates are explicitly disabled for this function, we're done.
19072   MachineFunction &MF = DAG.getMachineFunction();
19073   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
19074   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19075     return SDValue();
19076 
19077   // Estimates may be explicitly enabled for this type with a custom number of
19078   // refinement steps.
19079   int Iterations = TLI.getDivRefinementSteps(VT, MF);
19080   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
19081     AddToWorklist(Est.getNode());
19082 
19083     if (Iterations) {
19084       EVT VT = Op.getValueType();
19085       SDLoc DL(Op);
19086       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19087 
19088       // Newton iterations: Est = Est + Est (1 - Arg * Est)
19089       for (int i = 0; i < Iterations; ++i) {
19090         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
19091         AddToWorklist(NewEst.getNode());
19092 
19093         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
19094         AddToWorklist(NewEst.getNode());
19095 
19096         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19097         AddToWorklist(NewEst.getNode());
19098 
19099         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
19100         AddToWorklist(Est.getNode());
19101       }
19102     }
19103     return Est;
19104   }
19105 
19106   return SDValue();
19107 }
19108 
19109 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19110 /// For the reciprocal sqrt, we need to find the zero of the function:
19111 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19112 ///     =>
19113 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19114 /// As a result, we precompute A/2 prior to the iteration loop.
19115 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
19116                                          unsigned Iterations,
19117                                          SDNodeFlags Flags, bool Reciprocal) {
19118   EVT VT = Arg.getValueType();
19119   SDLoc DL(Arg);
19120   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
19121 
19122   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19123   // this entire sequence requires only one FP constant.
19124   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
19125   AddToWorklist(HalfArg.getNode());
19126 
19127   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
19128   AddToWorklist(HalfArg.getNode());
19129 
19130   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19131   for (unsigned i = 0; i < Iterations; ++i) {
19132     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
19133     AddToWorklist(NewEst.getNode());
19134 
19135     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
19136     AddToWorklist(NewEst.getNode());
19137 
19138     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
19139     AddToWorklist(NewEst.getNode());
19140 
19141     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19142     AddToWorklist(Est.getNode());
19143   }
19144 
19145   // If non-reciprocal square root is requested, multiply the result by Arg.
19146   if (!Reciprocal) {
19147     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
19148     AddToWorklist(Est.getNode());
19149   }
19150 
19151   return Est;
19152 }
19153 
19154 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19155 /// For the reciprocal sqrt, we need to find the zero of the function:
19156 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19157 ///     =>
19158 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19159 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
19160                                          unsigned Iterations,
19161                                          SDNodeFlags Flags, bool Reciprocal) {
19162   EVT VT = Arg.getValueType();
19163   SDLoc DL(Arg);
19164   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
19165   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
19166 
19167   // This routine must enter the loop below to work correctly
19168   // when (Reciprocal == false).
19169   assert(Iterations > 0);
19170 
19171   // Newton iterations for reciprocal square root:
19172   // E = (E * -0.5) * ((A * E) * E + -3.0)
19173   for (unsigned i = 0; i < Iterations; ++i) {
19174     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
19175     AddToWorklist(AE.getNode());
19176 
19177     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
19178     AddToWorklist(AEE.getNode());
19179 
19180     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
19181     AddToWorklist(RHS.getNode());
19182 
19183     // When calculating a square root at the last iteration build:
19184     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19185     // (notice a common subexpression)
19186     SDValue LHS;
19187     if (Reciprocal || (i + 1) < Iterations) {
19188       // RSQRT: LHS = (E * -0.5)
19189       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
19190     } else {
19191       // SQRT: LHS = (A * E) * -0.5
19192       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
19193     }
19194     AddToWorklist(LHS.getNode());
19195 
19196     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
19197     AddToWorklist(Est.getNode());
19198   }
19199 
19200   return Est;
19201 }
19202 
19203 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19204 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19205 /// Op can be zero.
19206 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
19207                                            bool Reciprocal) {
19208   if (Level >= AfterLegalizeDAG)
19209     return SDValue();
19210 
19211   // TODO: Handle half and/or extended types?
19212   EVT VT = Op.getValueType();
19213   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19214     return SDValue();
19215 
19216   // If estimates are explicitly disabled for this function, we're done.
19217   MachineFunction &MF = DAG.getMachineFunction();
19218   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
19219   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19220     return SDValue();
19221 
19222   // Estimates may be explicitly enabled for this type with a custom number of
19223   // refinement steps.
19224   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
19225 
19226   bool UseOneConstNR = false;
19227   if (SDValue Est =
19228       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
19229                           Reciprocal)) {
19230     AddToWorklist(Est.getNode());
19231 
19232     if (Iterations) {
19233       Est = UseOneConstNR
19234             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
19235             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
19236 
19237       if (!Reciprocal) {
19238         // The estimate is now completely wrong if the input was exactly 0.0 or
19239         // possibly a denormal. Force the answer to 0.0 for those cases.
19240         EVT VT = Op.getValueType();
19241         SDLoc DL(Op);
19242         EVT CCVT = getSetCCResultType(VT);
19243         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
19244         const Function &F = DAG.getMachineFunction().getFunction();
19245         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
19246         if (Denorms.getValueAsString().equals("ieee")) {
19247           // fabs(X) < SmallestNormal ? 0.0 : Est
19248           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
19249           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
19250           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
19251           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19252           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
19253           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19254           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19255           AddToWorklist(Fabs.getNode());
19256           AddToWorklist(IsDenorm.getNode());
19257           AddToWorklist(Est.getNode());
19258         } else {
19259           // X == 0.0 ? 0.0 : Est
19260           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19261           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19262           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19263           AddToWorklist(IsZero.getNode());
19264           AddToWorklist(Est.getNode());
19265         }
19266       }
19267     }
19268     return Est;
19269   }
19270 
19271   return SDValue();
19272 }
19273 
19274 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19275   return buildSqrtEstimateImpl(Op, Flags, true);
19276 }
19277 
19278 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19279   return buildSqrtEstimateImpl(Op, Flags, false);
19280 }
19281 
19282 /// Return true if there is any possibility that the two addresses overlap.
19283 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
19284   // If they are the same then they must be aliases.
19285   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
19286 
19287   // If they are both volatile then they cannot be reordered.
19288   if (Op0->isVolatile() && Op1->isVolatile()) return true;
19289 
19290   // If one operation reads from invariant memory, and the other may store, they
19291   // cannot alias. These should really be checking the equivalent of mayWrite,
19292   // but it only matters for memory nodes other than load /store.
19293   if (Op0->isInvariant() && Op1->writeMem())
19294     return false;
19295 
19296   if (Op1->isInvariant() && Op0->writeMem())
19297     return false;
19298 
19299   unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
19300   unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
19301 
19302   // Check for BaseIndexOffset matching.
19303   bool IsAlias;
19304   if (BaseIndexOffset::computeAliasing(
19305           BaseIndexOffset::match(Op0, DAG), NumBytes0,
19306           BaseIndexOffset::match(Op1, DAG), NumBytes1, DAG, IsAlias))
19307     return IsAlias;
19308 
19309   // If we know required SrcValue1 and SrcValue2 have relatively large
19310   // alignment compared to the size and offset of the access, we may be able
19311   // to prove they do not alias. This check is conservative for now to catch
19312   // cases created by splitting vector types.
19313   int64_t SrcValOffset0 = Op0->getSrcValueOffset();
19314   int64_t SrcValOffset1 = Op1->getSrcValueOffset();
19315   unsigned OrigAlignment0 = Op0->getOriginalAlignment();
19316   unsigned OrigAlignment1 = Op1->getOriginalAlignment();
19317   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19318       NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
19319     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19320     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19321 
19322     // There is no overlap between these relatively aligned accesses of
19323     // similar size. Return no alias.
19324     if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
19325         (OffAlign1 + NumBytes1) <= OffAlign0)
19326       return false;
19327   }
19328 
19329   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19330                    ? CombinerGlobalAA
19331                    : DAG.getSubtarget().useAA();
19332 #ifndef NDEBUG
19333   if (CombinerAAOnlyFunc.getNumOccurrences() &&
19334       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
19335     UseAA = false;
19336 #endif
19337 
19338   if (UseAA && AA &&
19339       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
19340     // Use alias analysis information.
19341     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19342     int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
19343     int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
19344     AliasResult AAResult =
19345         AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
19346                                  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
19347                   MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
19348                                  UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
19349     if (AAResult == NoAlias)
19350       return false;
19351   }
19352 
19353   // Otherwise we have to assume they alias.
19354   return true;
19355 }
19356 
19357 /// Walk up chain skipping non-aliasing memory nodes,
19358 /// looking for aliasing nodes and adding them to the Aliases vector.
19359 void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
19360                                    SmallVectorImpl<SDValue> &Aliases) {
19361   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
19362   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
19363 
19364   // Get alias information for node.
19365   bool IsLoad = isa<LoadSDNode>(N) && !N->isVolatile();
19366 
19367   // Starting off.
19368   Chains.push_back(OriginalChain);
19369   unsigned Depth = 0;
19370 
19371   // Look at each chain and determine if it is an alias.  If so, add it to the
19372   // aliases list.  If not, then continue up the chain looking for the next
19373   // candidate.
19374   while (!Chains.empty()) {
19375     SDValue Chain = Chains.pop_back_val();
19376 
19377     // For TokenFactor nodes, look at each operand and only continue up the
19378     // chain until we reach the depth limit.
19379     //
19380     // FIXME: The depth check could be made to return the last non-aliasing
19381     // chain we found before we hit a tokenfactor rather than the original
19382     // chain.
19383     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19384       Aliases.clear();
19385       Aliases.push_back(OriginalChain);
19386       return;
19387     }
19388 
19389     // Don't bother if we've been before.
19390     if (!Visited.insert(Chain.getNode()).second)
19391       continue;
19392 
19393     switch (Chain.getOpcode()) {
19394     case ISD::EntryToken:
19395       // Entry token is ideal chain operand, but handled in FindBetterChain.
19396       break;
19397 
19398     case ISD::LOAD:
19399     case ISD::STORE: {
19400       // Get alias information for Chain.
19401       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
19402           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
19403 
19404       // If chain is alias then stop here.
19405       if (!(IsLoad && IsOpLoad) &&
19406           isAlias(N, cast<LSBaseSDNode>(Chain.getNode()))) {
19407         Aliases.push_back(Chain);
19408       } else {
19409         // Look further up the chain.
19410         Chains.push_back(Chain.getOperand(0));
19411         ++Depth;
19412       }
19413       break;
19414     }
19415 
19416     case ISD::TokenFactor:
19417       // We have to check each of the operands of the token factor for "small"
19418       // token factors, so we queue them up.  Adding the operands to the queue
19419       // (stack) in reverse order maintains the original order and increases the
19420       // likelihood that getNode will find a matching token factor (CSE.)
19421       if (Chain.getNumOperands() > 16) {
19422         Aliases.push_back(Chain);
19423         break;
19424       }
19425       for (unsigned n = Chain.getNumOperands(); n;)
19426         Chains.push_back(Chain.getOperand(--n));
19427       ++Depth;
19428       break;
19429 
19430     case ISD::CopyFromReg:
19431       // Forward past CopyFromReg.
19432       Chains.push_back(Chain.getOperand(0));
19433       ++Depth;
19434       break;
19435 
19436     default:
19437       // For all other instructions we will just have to take what we can get.
19438       Aliases.push_back(Chain);
19439       break;
19440     }
19441   }
19442 }
19443 
19444 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19445 /// (aliasing node.)
19446 SDValue DAGCombiner::FindBetterChain(LSBaseSDNode *N, SDValue OldChain) {
19447   if (OptLevel == CodeGenOpt::None)
19448     return OldChain;
19449 
19450   // Ops for replacing token factor.
19451   SmallVector<SDValue, 8> Aliases;
19452 
19453   // Accumulate all the aliases to this node.
19454   GatherAllAliases(N, OldChain, Aliases);
19455 
19456   // If no operands then chain to entry token.
19457   if (Aliases.size() == 0)
19458     return DAG.getEntryNode();
19459 
19460   // If a single operand then chain to it.  We don't need to revisit it.
19461   if (Aliases.size() == 1)
19462     return Aliases[0];
19463 
19464   // Construct a custom tailored token factor.
19465   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
19466 }
19467 
19468 namespace {
19469 // TODO: Replace with with std::monostate when we move to C++17.
19470 struct UnitT { } Unit;
19471 bool operator==(const UnitT &, const UnitT &) { return true; }
19472 bool operator!=(const UnitT &, const UnitT &) { return false; }
19473 } // namespace
19474 
19475 // This function tries to collect a bunch of potentially interesting
19476 // nodes to improve the chains of, all at once. This might seem
19477 // redundant, as this function gets called when visiting every store
19478 // node, so why not let the work be done on each store as it's visited?
19479 //
19480 // I believe this is mainly important because MergeConsecutiveStores
19481 // is unable to deal with merging stores of different sizes, so unless
19482 // we improve the chains of all the potential candidates up-front
19483 // before running MergeConsecutiveStores, it might only see some of
19484 // the nodes that will eventually be candidates, and then not be able
19485 // to go from a partially-merged state to the desired final
19486 // fully-merged state.
19487 
19488 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19489   SmallVector<StoreSDNode *, 8> ChainedStores;
19490   StoreSDNode *STChain = St;
19491   // Intervals records which offsets from BaseIndex have been covered. In
19492   // the common case, every store writes to the immediately previous address
19493   // space and thus merged with the previous interval at insertion time.
19494 
19495   using IMap =
19496       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
19497   IMap::Allocator A;
19498   IMap Intervals(A);
19499 
19500   // This holds the base pointer, index, and the offset in bytes from the base
19501   // pointer.
19502   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19503 
19504   // We must have a base and an offset.
19505   if (!BasePtr.getBase().getNode())
19506     return false;
19507 
19508   // Do not handle stores to undef base pointers.
19509   if (BasePtr.getBase().isUndef())
19510     return false;
19511 
19512   // Add ST's interval.
19513   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
19514 
19515   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
19516     // If the chain has more than one use, then we can't reorder the mem ops.
19517     if (!SDValue(Chain, 0)->hasOneUse())
19518       break;
19519     if (Chain->isVolatile() || Chain->isIndexed())
19520       break;
19521 
19522     // Find the base pointer and offset for this memory node.
19523     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
19524     // Check that the base pointer is the same as the original one.
19525     int64_t Offset;
19526     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
19527       break;
19528     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
19529     // Make sure we don't overlap with other intervals by checking the ones to
19530     // the left or right before inserting.
19531     auto I = Intervals.find(Offset);
19532     // If there's a next interval, we should end before it.
19533     if (I != Intervals.end() && I.start() < (Offset + Length))
19534       break;
19535     // If there's a previous interval, we should start after it.
19536     if (I != Intervals.begin() && (--I).stop() <= Offset)
19537       break;
19538     Intervals.insert(Offset, Offset + Length, Unit);
19539 
19540     ChainedStores.push_back(Chain);
19541     STChain = Chain;
19542   }
19543 
19544   // If we didn't find a chained store, exit.
19545   if (ChainedStores.size() == 0)
19546     return false;
19547 
19548   // Improve all chained stores (St and ChainedStores members) starting from
19549   // where the store chain ended and return single TokenFactor.
19550   SDValue NewChain = STChain->getChain();
19551   SmallVector<SDValue, 8> TFOps;
19552   for (unsigned I = ChainedStores.size(); I;) {
19553     StoreSDNode *S = ChainedStores[--I];
19554     SDValue BetterChain = FindBetterChain(S, NewChain);
19555     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
19556         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
19557     TFOps.push_back(SDValue(S, 0));
19558     ChainedStores[I] = S;
19559   }
19560 
19561   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
19562   SDValue BetterChain = FindBetterChain(St, NewChain);
19563   SDValue NewST;
19564   if (St->isTruncatingStore())
19565     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
19566                               St->getBasePtr(), St->getMemoryVT(),
19567                               St->getMemOperand());
19568   else
19569     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
19570                          St->getBasePtr(), St->getMemOperand());
19571 
19572   TFOps.push_back(NewST);
19573 
19574   // If we improved every element of TFOps, then we've lost the dependence on
19575   // NewChain to successors of St and we need to add it back to TFOps. Do so at
19576   // the beginning to keep relative order consistent with FindBetterChains.
19577   auto hasImprovedChain = [&](SDValue ST) -> bool {
19578     return ST->getOperand(0) != NewChain;
19579   };
19580   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
19581   if (AddNewChain)
19582     TFOps.insert(TFOps.begin(), NewChain);
19583 
19584   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
19585   CombineTo(St, TF);
19586 
19587   AddToWorklist(STChain);
19588   // Add TF operands worklist in reverse order.
19589   for (auto I = TF->getNumOperands(); I;)
19590     AddToWorklist(TF->getOperand(--I).getNode());
19591   AddToWorklist(TF.getNode());
19592   return true;
19593 }
19594 
19595 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
19596   if (OptLevel == CodeGenOpt::None)
19597     return false;
19598 
19599   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
19600 
19601   // We must have a base and an offset.
19602   if (!BasePtr.getBase().getNode())
19603     return false;
19604 
19605   // Do not handle stores to undef base pointers.
19606   if (BasePtr.getBase().isUndef())
19607     return false;
19608 
19609   // Directly improve a chain of disjoint stores starting at St.
19610   if (parallelizeChainedStores(St))
19611     return true;
19612 
19613   // Improve St's Chain..
19614   SDValue BetterChain = FindBetterChain(St, St->getChain());
19615   if (St->getChain() != BetterChain) {
19616     replaceStoreChain(St, BetterChain);
19617     return true;
19618   }
19619   return false;
19620 }
19621 
19622 /// This is the entry point for the file.
19623 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
19624                            CodeGenOpt::Level OptLevel) {
19625   /// This is the main entry point to this class.
19626   DAGCombiner(*this, AA, OptLevel).Run(Level);
19627 }
19628