1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/CodeGen/DAGCombine.h"
35 #include "llvm/CodeGen/ISDOpcodes.h"
36 #include "llvm/CodeGen/MachineFrameInfo.h"
37 #include "llvm/CodeGen/MachineFunction.h"
38 #include "llvm/CodeGen/MachineMemOperand.h"
39 #include "llvm/CodeGen/RuntimeLibcalls.h"
40 #include "llvm/CodeGen/SelectionDAG.h"
41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
42 #include "llvm/CodeGen/SelectionDAGNodes.h"
43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
44 #include "llvm/CodeGen/TargetLowering.h"
45 #include "llvm/CodeGen/TargetRegisterInfo.h"
46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
47 #include "llvm/CodeGen/ValueTypes.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116   class DAGCombiner {
117     SelectionDAG &DAG;
118     const TargetLowering &TLI;
119     CombineLevel Level;
120     CodeGenOpt::Level OptLevel;
121     bool LegalOperations = false;
122     bool LegalTypes = false;
123     bool ForCodeSize;
124 
125     /// Worklist of all of the nodes that need to be simplified.
126     ///
127     /// This must behave as a stack -- new nodes to process are pushed onto the
128     /// back and when processing we pop off of the back.
129     ///
130     /// The worklist will not contain duplicates but may contain null entries
131     /// due to nodes being deleted from the underlying DAG.
132     SmallVector<SDNode *, 64> Worklist;
133 
134     /// Mapping from an SDNode to its position on the worklist.
135     ///
136     /// This is used to find and remove nodes from the worklist (by nulling
137     /// them) when they are deleted from the underlying DAG. It relies on
138     /// stable indices of nodes within the worklist.
139     DenseMap<SDNode *, unsigned> WorklistMap;
140     /// This records all nodes attempted to add to the worklist since we
141     /// considered a new worklist entry. As we keep do not add duplicate nodes
142     /// in the worklist, this is different from the tail of the worklist.
143     SmallSetVector<SDNode *, 32> PruningList;
144 
145     /// Set of nodes which have been combined (at least once).
146     ///
147     /// This is used to allow us to reliably add any operands of a DAG node
148     /// which have not yet been combined to the worklist.
149     SmallPtrSet<SDNode *, 32> CombinedNodes;
150 
151     // AA - Used for DAG load/store alias analysis.
152     AliasAnalysis *AA;
153 
154     /// When an instruction is simplified, add all users of the instruction to
155     /// the work lists because they might get more simplified now.
156     void AddUsersToWorklist(SDNode *N) {
157       for (SDNode *Node : N->uses())
158         AddToWorklist(Node);
159     }
160 
161     // Prune potentially dangling nodes. This is called after
162     // any visit to a node, but should also be called during a visit after any
163     // failed combine which may have created a DAG node.
164     void clearAddedDanglingWorklistEntries() {
165       // Check any nodes added to the worklist to see if they are prunable.
166       while (!PruningList.empty()) {
167         auto *N = PruningList.pop_back_val();
168         if (N->use_empty())
169           recursivelyDeleteUnusedNodes(N);
170       }
171     }
172 
173     SDNode *getNextWorklistEntry() {
174       // Before we do any work, remove nodes that are not in use.
175       clearAddedDanglingWorklistEntries();
176       SDNode *N = nullptr;
177       // The Worklist holds the SDNodes in order, but it may contain null
178       // entries.
179       while (!N && !Worklist.empty()) {
180         N = Worklist.pop_back_val();
181       }
182 
183       if (N) {
184         bool GoodWorklistEntry = WorklistMap.erase(N);
185         (void)GoodWorklistEntry;
186         assert(GoodWorklistEntry &&
187                "Found a worklist entry without a corresponding map entry!");
188       }
189       return N;
190     }
191 
192     /// Call the node-specific routine that folds each particular type of node.
193     SDValue visit(SDNode *N);
194 
195   public:
196     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
197         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
198           OptLevel(OL), AA(AA) {
199       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
200 
201       MaximumLegalStoreInBits = 0;
202       for (MVT VT : MVT::all_valuetypes())
203         if (EVT(VT).isSimple() && VT != MVT::Other &&
204             TLI.isTypeLegal(EVT(VT)) &&
205             VT.getSizeInBits() >= MaximumLegalStoreInBits)
206           MaximumLegalStoreInBits = VT.getSizeInBits();
207     }
208 
209     void ConsiderForPruning(SDNode *N) {
210       // Mark this for potential pruning.
211       PruningList.insert(N);
212     }
213 
214     /// Add to the worklist making sure its instance is at the back (next to be
215     /// processed.)
216     void AddToWorklist(SDNode *N) {
217       assert(N->getOpcode() != ISD::DELETED_NODE &&
218              "Deleted Node added to Worklist");
219 
220       // Skip handle nodes as they can't usefully be combined and confuse the
221       // zero-use deletion strategy.
222       if (N->getOpcode() == ISD::HANDLENODE)
223         return;
224 
225       ConsiderForPruning(N);
226 
227       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
228         Worklist.push_back(N);
229     }
230 
231     /// Remove all instances of N from the worklist.
232     void removeFromWorklist(SDNode *N) {
233       CombinedNodes.erase(N);
234       PruningList.remove(N);
235 
236       auto It = WorklistMap.find(N);
237       if (It == WorklistMap.end())
238         return; // Not in the worklist.
239 
240       // Null out the entry rather than erasing it to avoid a linear operation.
241       Worklist[It->second] = nullptr;
242       WorklistMap.erase(It);
243     }
244 
245     void deleteAndRecombine(SDNode *N);
246     bool recursivelyDeleteUnusedNodes(SDNode *N);
247 
248     /// Replaces all uses of the results of one DAG node with new values.
249     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
250                       bool AddTo = true);
251 
252     /// Replaces all uses of the results of one DAG node with new values.
253     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
254       return CombineTo(N, &Res, 1, AddTo);
255     }
256 
257     /// Replaces all uses of the results of one DAG node with new values.
258     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
259                       bool AddTo = true) {
260       SDValue To[] = { Res0, Res1 };
261       return CombineTo(N, To, 2, AddTo);
262     }
263 
264     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
265 
266   private:
267     unsigned MaximumLegalStoreInBits;
268 
269     /// Check the specified integer node value to see if it can be simplified or
270     /// if things it uses can be simplified by bit propagation.
271     /// If so, return true.
272     bool SimplifyDemandedBits(SDValue Op) {
273       unsigned BitWidth = Op.getScalarValueSizeInBits();
274       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
275       return SimplifyDemandedBits(Op, DemandedBits);
276     }
277 
278     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
279       EVT VT = Op.getValueType();
280       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
281       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
282       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
283     }
284 
285     /// Check the specified vector node value to see if it can be simplified or
286     /// if things it uses can be simplified as it only uses some of the
287     /// elements. If so, return true.
288     bool SimplifyDemandedVectorElts(SDValue Op) {
289       unsigned NumElts = Op.getValueType().getVectorNumElements();
290       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
291       return SimplifyDemandedVectorElts(Op, DemandedElts);
292     }
293 
294     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
295                               const APInt &DemandedElts);
296     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
297                                     bool AssumeSingleUse = false);
298 
299     bool CombineToPreIndexedLoadStore(SDNode *N);
300     bool CombineToPostIndexedLoadStore(SDNode *N);
301     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
302     bool SliceUpLoad(SDNode *N);
303 
304     // Scalars have size 0 to distinguish from singleton vectors.
305     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
306     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
307     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
308 
309     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
310     ///   load.
311     ///
312     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314     /// \param EltNo index of the vector element to load.
315     /// \param OriginalLoad load that EVE came from to be replaced.
316     /// \returns EVE on success SDValue() on failure.
317     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
318                                          SDValue EltNo,
319                                          LoadSDNode *OriginalLoad);
320     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
321     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
322     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
323     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
324     SDValue PromoteIntBinOp(SDValue Op);
325     SDValue PromoteIntShiftOp(SDValue Op);
326     SDValue PromoteExtend(SDValue Op);
327     bool PromoteLoad(SDValue Op);
328 
329     /// Call the node-specific routine that knows how to fold each
330     /// particular type of node. If that doesn't do anything, try the
331     /// target-specific DAG combines.
332     SDValue combine(SDNode *N);
333 
334     // Visitation implementation - Implement dag node combining for different
335     // node types.  The semantics are as follows:
336     // Return Value:
337     //   SDValue.getNode() == 0 - No change was made
338     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
339     //   otherwise              - N should be replaced by the returned Operand.
340     //
341     SDValue visitTokenFactor(SDNode *N);
342     SDValue visitMERGE_VALUES(SDNode *N);
343     SDValue visitADD(SDNode *N);
344     SDValue visitADDLike(SDNode *N);
345     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
346     SDValue visitSUB(SDNode *N);
347     SDValue visitADDSAT(SDNode *N);
348     SDValue visitSUBSAT(SDNode *N);
349     SDValue visitADDC(SDNode *N);
350     SDValue visitADDO(SDNode *N);
351     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
352     SDValue visitSUBC(SDNode *N);
353     SDValue visitSUBO(SDNode *N);
354     SDValue visitADDE(SDNode *N);
355     SDValue visitADDCARRY(SDNode *N);
356     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
357     SDValue visitSUBE(SDNode *N);
358     SDValue visitSUBCARRY(SDNode *N);
359     SDValue visitMUL(SDNode *N);
360     SDValue useDivRem(SDNode *N);
361     SDValue visitSDIV(SDNode *N);
362     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
363     SDValue visitUDIV(SDNode *N);
364     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
365     SDValue visitREM(SDNode *N);
366     SDValue visitMULHU(SDNode *N);
367     SDValue visitMULHS(SDNode *N);
368     SDValue visitSMUL_LOHI(SDNode *N);
369     SDValue visitUMUL_LOHI(SDNode *N);
370     SDValue visitMULO(SDNode *N);
371     SDValue visitIMINMAX(SDNode *N);
372     SDValue visitAND(SDNode *N);
373     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
374     SDValue visitOR(SDNode *N);
375     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
376     SDValue visitXOR(SDNode *N);
377     SDValue SimplifyVBinOp(SDNode *N);
378     SDValue visitSHL(SDNode *N);
379     SDValue visitSRA(SDNode *N);
380     SDValue visitSRL(SDNode *N);
381     SDValue visitFunnelShift(SDNode *N);
382     SDValue visitRotate(SDNode *N);
383     SDValue visitABS(SDNode *N);
384     SDValue visitBSWAP(SDNode *N);
385     SDValue visitBITREVERSE(SDNode *N);
386     SDValue visitCTLZ(SDNode *N);
387     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
388     SDValue visitCTTZ(SDNode *N);
389     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
390     SDValue visitCTPOP(SDNode *N);
391     SDValue visitSELECT(SDNode *N);
392     SDValue visitVSELECT(SDNode *N);
393     SDValue visitSELECT_CC(SDNode *N);
394     SDValue visitSETCC(SDNode *N);
395     SDValue visitSETCCCARRY(SDNode *N);
396     SDValue visitSIGN_EXTEND(SDNode *N);
397     SDValue visitZERO_EXTEND(SDNode *N);
398     SDValue visitANY_EXTEND(SDNode *N);
399     SDValue visitAssertExt(SDNode *N);
400     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
401     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
402     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
403     SDValue visitTRUNCATE(SDNode *N);
404     SDValue visitBITCAST(SDNode *N);
405     SDValue visitBUILD_PAIR(SDNode *N);
406     SDValue visitFADD(SDNode *N);
407     SDValue visitFSUB(SDNode *N);
408     SDValue visitFMUL(SDNode *N);
409     SDValue visitFMA(SDNode *N);
410     SDValue visitFDIV(SDNode *N);
411     SDValue visitFREM(SDNode *N);
412     SDValue visitFSQRT(SDNode *N);
413     SDValue visitFCOPYSIGN(SDNode *N);
414     SDValue visitFPOW(SDNode *N);
415     SDValue visitSINT_TO_FP(SDNode *N);
416     SDValue visitUINT_TO_FP(SDNode *N);
417     SDValue visitFP_TO_SINT(SDNode *N);
418     SDValue visitFP_TO_UINT(SDNode *N);
419     SDValue visitFP_ROUND(SDNode *N);
420     SDValue visitFP_ROUND_INREG(SDNode *N);
421     SDValue visitFP_EXTEND(SDNode *N);
422     SDValue visitFNEG(SDNode *N);
423     SDValue visitFABS(SDNode *N);
424     SDValue visitFCEIL(SDNode *N);
425     SDValue visitFTRUNC(SDNode *N);
426     SDValue visitFFLOOR(SDNode *N);
427     SDValue visitFMINNUM(SDNode *N);
428     SDValue visitFMAXNUM(SDNode *N);
429     SDValue visitFMINIMUM(SDNode *N);
430     SDValue visitFMAXIMUM(SDNode *N);
431     SDValue visitBRCOND(SDNode *N);
432     SDValue visitBR_CC(SDNode *N);
433     SDValue visitLOAD(SDNode *N);
434 
435     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
436     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
437 
438     SDValue visitSTORE(SDNode *N);
439     SDValue visitLIFETIME_END(SDNode *N);
440     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
441     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
442     SDValue visitBUILD_VECTOR(SDNode *N);
443     SDValue visitCONCAT_VECTORS(SDNode *N);
444     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
445     SDValue visitVECTOR_SHUFFLE(SDNode *N);
446     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
447     SDValue visitINSERT_SUBVECTOR(SDNode *N);
448     SDValue visitMLOAD(SDNode *N);
449     SDValue visitMSTORE(SDNode *N);
450     SDValue visitMGATHER(SDNode *N);
451     SDValue visitMSCATTER(SDNode *N);
452     SDValue visitFP_TO_FP16(SDNode *N);
453     SDValue visitFP16_TO_FP(SDNode *N);
454     SDValue visitVECREDUCE(SDNode *N);
455 
456     SDValue visitFADDForFMACombine(SDNode *N);
457     SDValue visitFSUBForFMACombine(SDNode *N);
458     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
459 
460     SDValue XformToShuffleWithZero(SDNode *N);
461     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
462                                       SDValue N1);
463     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
464                            SDValue N1, SDNodeFlags Flags);
465 
466     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
467 
468     SDValue foldSelectOfConstants(SDNode *N);
469     SDValue foldVSelectOfConstants(SDNode *N);
470     SDValue foldBinOpIntoSelect(SDNode *BO);
471     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
472     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
473     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
474     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
475                              SDValue N2, SDValue N3, ISD::CondCode CC,
476                              bool NotExtCompare = false);
477     SDValue convertSelectOfFPConstantsToLoadOffset(
478         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
479         ISD::CondCode CC);
480     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
481                                    SDValue N2, SDValue N3, ISD::CondCode CC);
482     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
483                               const SDLoc &DL);
484     SDValue unfoldMaskedMerge(SDNode *N);
485     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
486     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
487                           const SDLoc &DL, bool foldBooleans);
488     SDValue rebuildSetCC(SDValue N);
489 
490     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
491                            SDValue &CC) const;
492     bool isOneUseSetCC(SDValue N) const;
493 
494     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
495                                          unsigned HiOp);
496     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
497     SDValue CombineExtLoad(SDNode *N);
498     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
499     SDValue combineRepeatedFPDivisors(SDNode *N);
500     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
501     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
502     SDValue BuildSDIV(SDNode *N);
503     SDValue BuildSDIVPow2(SDNode *N);
504     SDValue BuildUDIV(SDNode *N);
505     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
506     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
507     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
508     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
509     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
510     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
511                                 SDNodeFlags Flags, bool Reciprocal);
512     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
513                                 SDNodeFlags Flags, bool Reciprocal);
514     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
515                                bool DemandHighBits = true);
516     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
517     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
518                               SDValue InnerPos, SDValue InnerNeg,
519                               unsigned PosOpcode, unsigned NegOpcode,
520                               const SDLoc &DL);
521     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
522     SDValue MatchLoadCombine(SDNode *N);
523     SDValue ReduceLoadWidth(SDNode *N);
524     SDValue ReduceLoadOpStoreWidth(SDNode *N);
525     SDValue splitMergedValStore(StoreSDNode *ST);
526     SDValue TransformFPLoadStorePair(SDNode *N);
527     SDValue convertBuildVecZextToZext(SDNode *N);
528     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
529     SDValue reduceBuildVecToShuffle(SDNode *N);
530     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
531                                   ArrayRef<int> VectorMask, SDValue VecIn1,
532                                   SDValue VecIn2, unsigned LeftIdx,
533                                   bool DidSplitVec);
534     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
535 
536     /// Walk up chain skipping non-aliasing memory nodes,
537     /// looking for aliasing nodes and adding them to the Aliases vector.
538     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
539                           SmallVectorImpl<SDValue> &Aliases);
540 
541     /// Return true if there is any possibility that the two addresses overlap.
542     bool isAlias(SDNode *Op0, SDNode *Op1) const;
543 
544     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
545     /// chain (aliasing node.)
546     SDValue FindBetterChain(SDNode *N, SDValue Chain);
547 
548     /// Try to replace a store and any possibly adjacent stores on
549     /// consecutive chains with better chains. Return true only if St is
550     /// replaced.
551     ///
552     /// Notice that other chains may still be replaced even if the function
553     /// returns false.
554     bool findBetterNeighborChains(StoreSDNode *St);
555 
556     // Helper for findBetterNeighborChains. Walk up store chain add additional
557     // chained stores that do not overlap and can be parallelized.
558     bool parallelizeChainedStores(StoreSDNode *St);
559 
560     /// Holds a pointer to an LSBaseSDNode as well as information on where it
561     /// is located in a sequence of memory operations connected by a chain.
562     struct MemOpLink {
563       // Ptr to the mem node.
564       LSBaseSDNode *MemNode;
565 
566       // Offset from the base ptr.
567       int64_t OffsetFromBase;
568 
569       MemOpLink(LSBaseSDNode *N, int64_t Offset)
570           : MemNode(N), OffsetFromBase(Offset) {}
571     };
572 
573     /// This is a helper function for visitMUL to check the profitability
574     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
575     /// MulNode is the original multiply, AddNode is (add x, c1),
576     /// and ConstNode is c2.
577     bool isMulAddWithConstProfitable(SDNode *MulNode,
578                                      SDValue &AddNode,
579                                      SDValue &ConstNode);
580 
581     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
582     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
583     /// the type of the loaded value to be extended.
584     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
585                           EVT LoadResultTy, EVT &ExtVT);
586 
587     /// Helper function to calculate whether the given Load/Store can have its
588     /// width reduced to ExtVT.
589     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
590                            EVT &MemVT, unsigned ShAmt = 0);
591 
592     /// Used by BackwardsPropagateMask to find suitable loads.
593     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
594                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
595                            ConstantSDNode *Mask, SDNode *&NodeToMask);
596     /// Attempt to propagate a given AND node back to load leaves so that they
597     /// can be combined into narrow loads.
598     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
599 
600     /// Helper function for MergeConsecutiveStores which merges the
601     /// component store chains.
602     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
603                                 unsigned NumStores);
604 
605     /// This is a helper function for MergeConsecutiveStores. When the
606     /// source elements of the consecutive stores are all constants or
607     /// all extracted vector elements, try to merge them into one
608     /// larger store introducing bitcasts if necessary.  \return True
609     /// if a merged store was created.
610     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
611                                          EVT MemVT, unsigned NumStores,
612                                          bool IsConstantSrc, bool UseVector,
613                                          bool UseTrunc);
614 
615     /// This is a helper function for MergeConsecutiveStores. Stores
616     /// that potentially may be merged with St are placed in
617     /// StoreNodes. RootNode is a chain predecessor to all store
618     /// candidates.
619     void getStoreMergeCandidates(StoreSDNode *St,
620                                  SmallVectorImpl<MemOpLink> &StoreNodes,
621                                  SDNode *&Root);
622 
623     /// Helper function for MergeConsecutiveStores. Checks if
624     /// candidate stores have indirect dependency through their
625     /// operands. RootNode is the predecessor to all stores calculated
626     /// by getStoreMergeCandidates and is used to prune the dependency check.
627     /// \return True if safe to merge.
628     bool checkMergeStoreCandidatesForDependencies(
629         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
630         SDNode *RootNode);
631 
632     /// Merge consecutive store operations into a wide store.
633     /// This optimization uses wide integers or vectors when possible.
634     /// \return number of stores that were merged into a merged store (the
635     /// affected nodes are stored as a prefix in \p StoreNodes).
636     bool MergeConsecutiveStores(StoreSDNode *St);
637 
638     /// Try to transform a truncation where C is a constant:
639     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
640     ///
641     /// \p N needs to be a truncation and its first operand an AND. Other
642     /// requirements are checked by the function (e.g. that trunc is
643     /// single-use) and if missed an empty SDValue is returned.
644     SDValue distributeTruncateThroughAnd(SDNode *N);
645 
646     /// Helper function to determine whether the target supports operation
647     /// given by \p Opcode for type \p VT, that is, whether the operation
648     /// is legal or custom before legalizing operations, and whether is
649     /// legal (but not custom) after legalization.
650     bool hasOperation(unsigned Opcode, EVT VT) {
651       if (LegalOperations)
652         return TLI.isOperationLegal(Opcode, VT);
653       return TLI.isOperationLegalOrCustom(Opcode, VT);
654     }
655 
656   public:
657     /// Runs the dag combiner on all nodes in the work list
658     void Run(CombineLevel AtLevel);
659 
660     SelectionDAG &getDAG() const { return DAG; }
661 
662     /// Returns a type large enough to hold any valid shift amount - before type
663     /// legalization these can be huge.
664     EVT getShiftAmountTy(EVT LHSTy) {
665       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
666       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
667     }
668 
669     /// This method returns true if we are running before type legalization or
670     /// if the specified VT is legal.
671     bool isTypeLegal(const EVT &VT) {
672       if (!LegalTypes) return true;
673       return TLI.isTypeLegal(VT);
674     }
675 
676     /// Convenience wrapper around TargetLowering::getSetCCResultType
677     EVT getSetCCResultType(EVT VT) const {
678       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
679     }
680 
681     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
682                          SDValue OrigLoad, SDValue ExtLoad,
683                          ISD::NodeType ExtType);
684   };
685 
686 /// This class is a DAGUpdateListener that removes any deleted
687 /// nodes from the worklist.
688 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
689   DAGCombiner &DC;
690 
691 public:
692   explicit WorklistRemover(DAGCombiner &dc)
693     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
694 
695   void NodeDeleted(SDNode *N, SDNode *E) override {
696     DC.removeFromWorklist(N);
697   }
698 };
699 
700 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
701   DAGCombiner &DC;
702 
703 public:
704   explicit WorklistInserter(DAGCombiner &dc)
705       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
706 
707   // FIXME: Ideally we could add N to the worklist, but this causes exponential
708   //        compile time costs in large DAGs, e.g. Halide.
709   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
710 };
711 
712 } // end anonymous namespace
713 
714 //===----------------------------------------------------------------------===//
715 //  TargetLowering::DAGCombinerInfo implementation
716 //===----------------------------------------------------------------------===//
717 
718 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
719   ((DAGCombiner*)DC)->AddToWorklist(N);
720 }
721 
722 SDValue TargetLowering::DAGCombinerInfo::
723 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
724   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
725 }
726 
727 SDValue TargetLowering::DAGCombinerInfo::
728 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
729   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
730 }
731 
732 SDValue TargetLowering::DAGCombinerInfo::
733 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
734   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
735 }
736 
737 void TargetLowering::DAGCombinerInfo::
738 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
739   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
740 }
741 
742 //===----------------------------------------------------------------------===//
743 // Helper Functions
744 //===----------------------------------------------------------------------===//
745 
746 void DAGCombiner::deleteAndRecombine(SDNode *N) {
747   removeFromWorklist(N);
748 
749   // If the operands of this node are only used by the node, they will now be
750   // dead. Make sure to re-visit them and recursively delete dead nodes.
751   for (const SDValue &Op : N->ops())
752     // For an operand generating multiple values, one of the values may
753     // become dead allowing further simplification (e.g. split index
754     // arithmetic from an indexed load).
755     if (Op->hasOneUse() || Op->getNumValues() > 1)
756       AddToWorklist(Op.getNode());
757 
758   DAG.DeleteNode(N);
759 }
760 
761 /// Return 1 if we can compute the negated form of the specified expression for
762 /// the same cost as the expression itself, or 2 if we can compute the negated
763 /// form more cheaply than the expression itself.
764 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
765                                const TargetLowering &TLI,
766                                const TargetOptions *Options,
767                                bool ForCodeSize,
768                                unsigned Depth = 0) {
769   // fneg is removable even if it has multiple uses.
770   if (Op.getOpcode() == ISD::FNEG) return 2;
771 
772   // Don't allow anything with multiple uses unless we know it is free.
773   EVT VT = Op.getValueType();
774   const SDNodeFlags Flags = Op->getFlags();
775   if (!Op.hasOneUse())
776     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
777           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
778       return 0;
779 
780   // Don't recurse exponentially.
781   if (Depth > 6) return 0;
782 
783   switch (Op.getOpcode()) {
784   default: return false;
785   case ISD::ConstantFP: {
786     if (!LegalOperations)
787       return 1;
788 
789     // Don't invert constant FP values after legalization unless the target says
790     // the negated constant is legal.
791     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
792       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
793                        ForCodeSize);
794   }
795   case ISD::FADD:
796     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
797       return 0;
798 
799     // After operation legalization, it might not be legal to create new FSUBs.
800     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
801       return 0;
802 
803     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
804     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
805                                     Options, ForCodeSize, Depth + 1))
806       return V;
807     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
808     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
809                               ForCodeSize, Depth + 1);
810   case ISD::FSUB:
811     // We can't turn -(A-B) into B-A when we honor signed zeros.
812     if (!Options->NoSignedZerosFPMath &&
813         !Flags.hasNoSignedZeros())
814       return 0;
815 
816     // fold (fneg (fsub A, B)) -> (fsub B, A)
817     return 1;
818 
819   case ISD::FMUL:
820   case ISD::FDIV:
821     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
822     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
823                                     Options, ForCodeSize, Depth + 1))
824       return V;
825 
826     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
827                               ForCodeSize, Depth + 1);
828 
829   case ISD::FP_EXTEND:
830   case ISD::FP_ROUND:
831   case ISD::FSIN:
832     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
833                               ForCodeSize, Depth + 1);
834   }
835 }
836 
837 /// If isNegatibleForFree returns true, return the newly negated expression.
838 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
839                                     bool LegalOperations, bool ForCodeSize,
840                                     unsigned Depth = 0) {
841   const TargetOptions &Options = DAG.getTarget().Options;
842   // fneg is removable even if it has multiple uses.
843   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
844 
845   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
846 
847   const SDNodeFlags Flags = Op.getNode()->getFlags();
848 
849   switch (Op.getOpcode()) {
850   default: llvm_unreachable("Unknown code");
851   case ISD::ConstantFP: {
852     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
853     V.changeSign();
854     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
855   }
856   case ISD::FADD:
857     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
858 
859     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
860     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
861                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
862                            Depth+1))
863       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
864                          GetNegatedExpression(Op.getOperand(0), DAG,
865                                               LegalOperations, ForCodeSize,
866                                               Depth+1),
867                          Op.getOperand(1), Flags);
868     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
869     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
870                        GetNegatedExpression(Op.getOperand(1), DAG,
871                                             LegalOperations, ForCodeSize,
872                                             Depth+1),
873                        Op.getOperand(0), Flags);
874   case ISD::FSUB:
875     // fold (fneg (fsub 0, B)) -> B
876     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
877       if (N0CFP->isZero())
878         return Op.getOperand(1);
879 
880     // fold (fneg (fsub A, B)) -> (fsub B, A)
881     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
882                        Op.getOperand(1), Op.getOperand(0), Flags);
883 
884   case ISD::FMUL:
885   case ISD::FDIV:
886     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
887     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
888                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
889                            Depth+1))
890       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
891                          GetNegatedExpression(Op.getOperand(0), DAG,
892                                               LegalOperations, ForCodeSize,
893                                               Depth+1),
894                          Op.getOperand(1), Flags);
895 
896     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
897     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
898                        Op.getOperand(0),
899                        GetNegatedExpression(Op.getOperand(1), DAG,
900                                             LegalOperations, ForCodeSize,
901                                             Depth+1), Flags);
902 
903   case ISD::FP_EXTEND:
904   case ISD::FSIN:
905     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
906                        GetNegatedExpression(Op.getOperand(0), DAG,
907                                             LegalOperations, ForCodeSize,
908                                             Depth+1));
909   case ISD::FP_ROUND:
910       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
911                          GetNegatedExpression(Op.getOperand(0), DAG,
912                                               LegalOperations, ForCodeSize,
913                                               Depth+1),
914                          Op.getOperand(1));
915   }
916 }
917 
918 // APInts must be the same size for most operations, this helper
919 // function zero extends the shorter of the pair so that they match.
920 // We provide an Offset so that we can create bitwidths that won't overflow.
921 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
922   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
923   LHS = LHS.zextOrSelf(Bits);
924   RHS = RHS.zextOrSelf(Bits);
925 }
926 
927 // Return true if this node is a setcc, or is a select_cc
928 // that selects between the target values used for true and false, making it
929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
930 // the appropriate nodes based on the type of node we are checking. This
931 // simplifies life a bit for the callers.
932 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
933                                     SDValue &CC) const {
934   if (N.getOpcode() == ISD::SETCC) {
935     LHS = N.getOperand(0);
936     RHS = N.getOperand(1);
937     CC  = N.getOperand(2);
938     return true;
939   }
940 
941   if (N.getOpcode() != ISD::SELECT_CC ||
942       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
943       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
944     return false;
945 
946   if (TLI.getBooleanContents(N.getValueType()) ==
947       TargetLowering::UndefinedBooleanContent)
948     return false;
949 
950   LHS = N.getOperand(0);
951   RHS = N.getOperand(1);
952   CC  = N.getOperand(4);
953   return true;
954 }
955 
956 /// Return true if this is a SetCC-equivalent operation with only one use.
957 /// If this is true, it allows the users to invert the operation for free when
958 /// it is profitable to do so.
959 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
960   SDValue N0, N1, N2;
961   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
962     return true;
963   return false;
964 }
965 
966 // Returns the SDNode if it is a constant float BuildVector
967 // or constant float.
968 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
969   if (isa<ConstantFPSDNode>(N))
970     return N.getNode();
971   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
972     return N.getNode();
973   return nullptr;
974 }
975 
976 // Determines if it is a constant integer or a build vector of constant
977 // integers (and undefs).
978 // Do not permit build vector implicit truncation.
979 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
980   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
981     return !(Const->isOpaque() && NoOpaques);
982   if (N.getOpcode() != ISD::BUILD_VECTOR)
983     return false;
984   unsigned BitWidth = N.getScalarValueSizeInBits();
985   for (const SDValue &Op : N->op_values()) {
986     if (Op.isUndef())
987       continue;
988     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
989     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
990         (Const->isOpaque() && NoOpaques))
991       return false;
992   }
993   return true;
994 }
995 
996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
997 // undef's.
998 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
999   if (V.getOpcode() != ISD::BUILD_VECTOR)
1000     return false;
1001   return isConstantOrConstantVector(V, NoOpaques) ||
1002          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1003 }
1004 
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1008                                                SDValue N0, SDValue N1) {
1009   EVT VT = N0.getValueType();
1010 
1011   if (N0.getOpcode() != Opc)
1012     return SDValue();
1013 
1014   // Don't reassociate reductions.
1015   if (N0->getFlags().hasVectorReduction())
1016     return SDValue();
1017 
1018   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1019     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1020       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1022         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1023       return SDValue();
1024     }
1025     if (N0.hasOneUse()) {
1026       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027       //              iff (op x, c1) has one use
1028       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1029       if (!OpNode.getNode())
1030         return SDValue();
1031       AddToWorklist(OpNode.getNode());
1032       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1033     }
1034   }
1035   return SDValue();
1036 }
1037 
1038 // Try to reassociate commutative binops.
1039 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1040                                     SDValue N1, SDNodeFlags Flags) {
1041   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1042   // Don't reassociate reductions.
1043   if (Flags.hasVectorReduction())
1044     return SDValue();
1045 
1046   // Floating-point reassociation is not allowed without loose FP math.
1047   if (N0.getValueType().isFloatingPoint() ||
1048       N1.getValueType().isFloatingPoint())
1049     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1050       return SDValue();
1051 
1052   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1053     return Combined;
1054   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1055     return Combined;
1056   return SDValue();
1057 }
1058 
1059 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1060                                bool AddTo) {
1061   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1062   ++NodesCombined;
1063   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1064              To[0].getNode()->dump(&DAG);
1065              dbgs() << " and " << NumTo - 1 << " other values\n");
1066   for (unsigned i = 0, e = NumTo; i != e; ++i)
1067     assert((!To[i].getNode() ||
1068             N->getValueType(i) == To[i].getValueType()) &&
1069            "Cannot combine value to value of different type!");
1070 
1071   WorklistRemover DeadNodes(*this);
1072   DAG.ReplaceAllUsesWith(N, To);
1073   if (AddTo) {
1074     // Push the new nodes and any users onto the worklist
1075     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1076       if (To[i].getNode()) {
1077         AddToWorklist(To[i].getNode());
1078         AddUsersToWorklist(To[i].getNode());
1079       }
1080     }
1081   }
1082 
1083   // Finally, if the node is now dead, remove it from the graph.  The node
1084   // may not be dead if the replacement process recursively simplified to
1085   // something else needing this node.
1086   if (N->use_empty())
1087     deleteAndRecombine(N);
1088   return SDValue(N, 0);
1089 }
1090 
1091 void DAGCombiner::
1092 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1093   // Replace all uses.  If any nodes become isomorphic to other nodes and
1094   // are deleted, make sure to remove them from our worklist.
1095   WorklistRemover DeadNodes(*this);
1096   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1097 
1098   // Push the new node and any (possibly new) users onto the worklist.
1099   AddToWorklist(TLO.New.getNode());
1100   AddUsersToWorklist(TLO.New.getNode());
1101 
1102   // Finally, if the node is now dead, remove it from the graph.  The node
1103   // may not be dead if the replacement process recursively simplified to
1104   // something else needing this node.
1105   if (TLO.Old.getNode()->use_empty())
1106     deleteAndRecombine(TLO.Old.getNode());
1107 }
1108 
1109 /// Check the specified integer node value to see if it can be simplified or if
1110 /// things it uses can be simplified by bit propagation. If so, return true.
1111 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1112                                        const APInt &DemandedElts) {
1113   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1114   KnownBits Known;
1115   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1116     return false;
1117 
1118   // Revisit the node.
1119   AddToWorklist(Op.getNode());
1120 
1121   // Replace the old value with the new one.
1122   ++NodesCombined;
1123   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1124              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1125              dbgs() << '\n');
1126 
1127   CommitTargetLoweringOpt(TLO);
1128   return true;
1129 }
1130 
1131 /// Check the specified vector node value to see if it can be simplified or
1132 /// if things it uses can be simplified as it only uses some of the elements.
1133 /// If so, return true.
1134 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1135                                              const APInt &DemandedElts,
1136                                              bool AssumeSingleUse) {
1137   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1138   APInt KnownUndef, KnownZero;
1139   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1140                                       TLO, 0, AssumeSingleUse))
1141     return false;
1142 
1143   // Revisit the node.
1144   AddToWorklist(Op.getNode());
1145 
1146   // Replace the old value with the new one.
1147   ++NodesCombined;
1148   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1149              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1150              dbgs() << '\n');
1151 
1152   CommitTargetLoweringOpt(TLO);
1153   return true;
1154 }
1155 
1156 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1157   SDLoc DL(Load);
1158   EVT VT = Load->getValueType(0);
1159   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1160 
1161   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1162              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1163   WorklistRemover DeadNodes(*this);
1164   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1165   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1166   deleteAndRecombine(Load);
1167   AddToWorklist(Trunc.getNode());
1168 }
1169 
1170 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1171   Replace = false;
1172   SDLoc DL(Op);
1173   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1174     LoadSDNode *LD = cast<LoadSDNode>(Op);
1175     EVT MemVT = LD->getMemoryVT();
1176     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1177                                                       : LD->getExtensionType();
1178     Replace = true;
1179     return DAG.getExtLoad(ExtType, DL, PVT,
1180                           LD->getChain(), LD->getBasePtr(),
1181                           MemVT, LD->getMemOperand());
1182   }
1183 
1184   unsigned Opc = Op.getOpcode();
1185   switch (Opc) {
1186   default: break;
1187   case ISD::AssertSext:
1188     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1189       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1190     break;
1191   case ISD::AssertZext:
1192     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1193       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1194     break;
1195   case ISD::Constant: {
1196     unsigned ExtOpc =
1197       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1198     return DAG.getNode(ExtOpc, DL, PVT, Op);
1199   }
1200   }
1201 
1202   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1203     return SDValue();
1204   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1205 }
1206 
1207 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1208   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1209     return SDValue();
1210   EVT OldVT = Op.getValueType();
1211   SDLoc DL(Op);
1212   bool Replace = false;
1213   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1214   if (!NewOp.getNode())
1215     return SDValue();
1216   AddToWorklist(NewOp.getNode());
1217 
1218   if (Replace)
1219     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1220   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1221                      DAG.getValueType(OldVT));
1222 }
1223 
1224 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1225   EVT OldVT = Op.getValueType();
1226   SDLoc DL(Op);
1227   bool Replace = false;
1228   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1229   if (!NewOp.getNode())
1230     return SDValue();
1231   AddToWorklist(NewOp.getNode());
1232 
1233   if (Replace)
1234     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1235   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1236 }
1237 
1238 /// Promote the specified integer binary operation if the target indicates it is
1239 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1240 /// i32 since i16 instructions are longer.
1241 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1242   if (!LegalOperations)
1243     return SDValue();
1244 
1245   EVT VT = Op.getValueType();
1246   if (VT.isVector() || !VT.isInteger())
1247     return SDValue();
1248 
1249   // If operation type is 'undesirable', e.g. i16 on x86, consider
1250   // promoting it.
1251   unsigned Opc = Op.getOpcode();
1252   if (TLI.isTypeDesirableForOp(Opc, VT))
1253     return SDValue();
1254 
1255   EVT PVT = VT;
1256   // Consult target whether it is a good idea to promote this operation and
1257   // what's the right type to promote it to.
1258   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1259     assert(PVT != VT && "Don't know what type to promote to!");
1260 
1261     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1262 
1263     bool Replace0 = false;
1264     SDValue N0 = Op.getOperand(0);
1265     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1266 
1267     bool Replace1 = false;
1268     SDValue N1 = Op.getOperand(1);
1269     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1270     SDLoc DL(Op);
1271 
1272     SDValue RV =
1273         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1274 
1275     // We are always replacing N0/N1's use in N and only need
1276     // additional replacements if there are additional uses.
1277     Replace0 &= !N0->hasOneUse();
1278     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1279 
1280     // Combine Op here so it is preserved past replacements.
1281     CombineTo(Op.getNode(), RV);
1282 
1283     // If operands have a use ordering, make sure we deal with
1284     // predecessor first.
1285     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1286       std::swap(N0, N1);
1287       std::swap(NN0, NN1);
1288     }
1289 
1290     if (Replace0) {
1291       AddToWorklist(NN0.getNode());
1292       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1293     }
1294     if (Replace1) {
1295       AddToWorklist(NN1.getNode());
1296       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1297     }
1298     return Op;
1299   }
1300   return SDValue();
1301 }
1302 
1303 /// Promote the specified integer shift operation if the target indicates it is
1304 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1305 /// i32 since i16 instructions are longer.
1306 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1307   if (!LegalOperations)
1308     return SDValue();
1309 
1310   EVT VT = Op.getValueType();
1311   if (VT.isVector() || !VT.isInteger())
1312     return SDValue();
1313 
1314   // If operation type is 'undesirable', e.g. i16 on x86, consider
1315   // promoting it.
1316   unsigned Opc = Op.getOpcode();
1317   if (TLI.isTypeDesirableForOp(Opc, VT))
1318     return SDValue();
1319 
1320   EVT PVT = VT;
1321   // Consult target whether it is a good idea to promote this operation and
1322   // what's the right type to promote it to.
1323   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1324     assert(PVT != VT && "Don't know what type to promote to!");
1325 
1326     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1327 
1328     bool Replace = false;
1329     SDValue N0 = Op.getOperand(0);
1330     SDValue N1 = Op.getOperand(1);
1331     if (Opc == ISD::SRA)
1332       N0 = SExtPromoteOperand(N0, PVT);
1333     else if (Opc == ISD::SRL)
1334       N0 = ZExtPromoteOperand(N0, PVT);
1335     else
1336       N0 = PromoteOperand(N0, PVT, Replace);
1337 
1338     if (!N0.getNode())
1339       return SDValue();
1340 
1341     SDLoc DL(Op);
1342     SDValue RV =
1343         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1344 
1345     AddToWorklist(N0.getNode());
1346     if (Replace)
1347       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1348 
1349     // Deal with Op being deleted.
1350     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1351       return RV;
1352   }
1353   return SDValue();
1354 }
1355 
1356 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1357   if (!LegalOperations)
1358     return SDValue();
1359 
1360   EVT VT = Op.getValueType();
1361   if (VT.isVector() || !VT.isInteger())
1362     return SDValue();
1363 
1364   // If operation type is 'undesirable', e.g. i16 on x86, consider
1365   // promoting it.
1366   unsigned Opc = Op.getOpcode();
1367   if (TLI.isTypeDesirableForOp(Opc, VT))
1368     return SDValue();
1369 
1370   EVT PVT = VT;
1371   // Consult target whether it is a good idea to promote this operation and
1372   // what's the right type to promote it to.
1373   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1374     assert(PVT != VT && "Don't know what type to promote to!");
1375     // fold (aext (aext x)) -> (aext x)
1376     // fold (aext (zext x)) -> (zext x)
1377     // fold (aext (sext x)) -> (sext x)
1378     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1379     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1380   }
1381   return SDValue();
1382 }
1383 
1384 bool DAGCombiner::PromoteLoad(SDValue Op) {
1385   if (!LegalOperations)
1386     return false;
1387 
1388   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1389     return false;
1390 
1391   EVT VT = Op.getValueType();
1392   if (VT.isVector() || !VT.isInteger())
1393     return false;
1394 
1395   // If operation type is 'undesirable', e.g. i16 on x86, consider
1396   // promoting it.
1397   unsigned Opc = Op.getOpcode();
1398   if (TLI.isTypeDesirableForOp(Opc, VT))
1399     return false;
1400 
1401   EVT PVT = VT;
1402   // Consult target whether it is a good idea to promote this operation and
1403   // what's the right type to promote it to.
1404   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1405     assert(PVT != VT && "Don't know what type to promote to!");
1406 
1407     SDLoc DL(Op);
1408     SDNode *N = Op.getNode();
1409     LoadSDNode *LD = cast<LoadSDNode>(N);
1410     EVT MemVT = LD->getMemoryVT();
1411     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1412                                                       : LD->getExtensionType();
1413     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1414                                    LD->getChain(), LD->getBasePtr(),
1415                                    MemVT, LD->getMemOperand());
1416     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1417 
1418     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1419                Result.getNode()->dump(&DAG); dbgs() << '\n');
1420     WorklistRemover DeadNodes(*this);
1421     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1422     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1423     deleteAndRecombine(N);
1424     AddToWorklist(Result.getNode());
1425     return true;
1426   }
1427   return false;
1428 }
1429 
1430 /// Recursively delete a node which has no uses and any operands for
1431 /// which it is the only use.
1432 ///
1433 /// Note that this both deletes the nodes and removes them from the worklist.
1434 /// It also adds any nodes who have had a user deleted to the worklist as they
1435 /// may now have only one use and subject to other combines.
1436 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1437   if (!N->use_empty())
1438     return false;
1439 
1440   SmallSetVector<SDNode *, 16> Nodes;
1441   Nodes.insert(N);
1442   do {
1443     N = Nodes.pop_back_val();
1444     if (!N)
1445       continue;
1446 
1447     if (N->use_empty()) {
1448       for (const SDValue &ChildN : N->op_values())
1449         Nodes.insert(ChildN.getNode());
1450 
1451       removeFromWorklist(N);
1452       DAG.DeleteNode(N);
1453     } else {
1454       AddToWorklist(N);
1455     }
1456   } while (!Nodes.empty());
1457   return true;
1458 }
1459 
1460 //===----------------------------------------------------------------------===//
1461 //  Main DAG Combiner implementation
1462 //===----------------------------------------------------------------------===//
1463 
1464 void DAGCombiner::Run(CombineLevel AtLevel) {
1465   // set the instance variables, so that the various visit routines may use it.
1466   Level = AtLevel;
1467   LegalOperations = Level >= AfterLegalizeVectorOps;
1468   LegalTypes = Level >= AfterLegalizeTypes;
1469 
1470   WorklistInserter AddNodes(*this);
1471 
1472   // Add all the dag nodes to the worklist.
1473   for (SDNode &Node : DAG.allnodes())
1474     AddToWorklist(&Node);
1475 
1476   // Create a dummy node (which is not added to allnodes), that adds a reference
1477   // to the root node, preventing it from being deleted, and tracking any
1478   // changes of the root.
1479   HandleSDNode Dummy(DAG.getRoot());
1480 
1481   // While we have a valid worklist entry node, try to combine it.
1482   while (SDNode *N = getNextWorklistEntry()) {
1483     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1484     // N is deleted from the DAG, since they too may now be dead or may have a
1485     // reduced number of uses, allowing other xforms.
1486     if (recursivelyDeleteUnusedNodes(N))
1487       continue;
1488 
1489     WorklistRemover DeadNodes(*this);
1490 
1491     // If this combine is running after legalizing the DAG, re-legalize any
1492     // nodes pulled off the worklist.
1493     if (Level == AfterLegalizeDAG) {
1494       SmallSetVector<SDNode *, 16> UpdatedNodes;
1495       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1496 
1497       for (SDNode *LN : UpdatedNodes) {
1498         AddToWorklist(LN);
1499         AddUsersToWorklist(LN);
1500       }
1501       if (!NIsValid)
1502         continue;
1503     }
1504 
1505     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1506 
1507     // Add any operands of the new node which have not yet been combined to the
1508     // worklist as well. Because the worklist uniques things already, this
1509     // won't repeatedly process the same operand.
1510     CombinedNodes.insert(N);
1511     for (const SDValue &ChildN : N->op_values())
1512       if (!CombinedNodes.count(ChildN.getNode()))
1513         AddToWorklist(ChildN.getNode());
1514 
1515     SDValue RV = combine(N);
1516 
1517     if (!RV.getNode())
1518       continue;
1519 
1520     ++NodesCombined;
1521 
1522     // If we get back the same node we passed in, rather than a new node or
1523     // zero, we know that the node must have defined multiple values and
1524     // CombineTo was used.  Since CombineTo takes care of the worklist
1525     // mechanics for us, we have no work to do in this case.
1526     if (RV.getNode() == N)
1527       continue;
1528 
1529     assert(N->getOpcode() != ISD::DELETED_NODE &&
1530            RV.getOpcode() != ISD::DELETED_NODE &&
1531            "Node was deleted but visit returned new node!");
1532 
1533     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1534 
1535     if (N->getNumValues() == RV.getNode()->getNumValues())
1536       DAG.ReplaceAllUsesWith(N, RV.getNode());
1537     else {
1538       assert(N->getValueType(0) == RV.getValueType() &&
1539              N->getNumValues() == 1 && "Type mismatch");
1540       DAG.ReplaceAllUsesWith(N, &RV);
1541     }
1542 
1543     // Push the new node and any users onto the worklist
1544     AddToWorklist(RV.getNode());
1545     AddUsersToWorklist(RV.getNode());
1546 
1547     // Finally, if the node is now dead, remove it from the graph.  The node
1548     // may not be dead if the replacement process recursively simplified to
1549     // something else needing this node. This will also take care of adding any
1550     // operands which have lost a user to the worklist.
1551     recursivelyDeleteUnusedNodes(N);
1552   }
1553 
1554   // If the root changed (e.g. it was a dead load, update the root).
1555   DAG.setRoot(Dummy.getValue());
1556   DAG.RemoveDeadNodes();
1557 }
1558 
1559 SDValue DAGCombiner::visit(SDNode *N) {
1560   switch (N->getOpcode()) {
1561   default: break;
1562   case ISD::TokenFactor:        return visitTokenFactor(N);
1563   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1564   case ISD::ADD:                return visitADD(N);
1565   case ISD::SUB:                return visitSUB(N);
1566   case ISD::SADDSAT:
1567   case ISD::UADDSAT:            return visitADDSAT(N);
1568   case ISD::SSUBSAT:
1569   case ISD::USUBSAT:            return visitSUBSAT(N);
1570   case ISD::ADDC:               return visitADDC(N);
1571   case ISD::SADDO:
1572   case ISD::UADDO:              return visitADDO(N);
1573   case ISD::SUBC:               return visitSUBC(N);
1574   case ISD::SSUBO:
1575   case ISD::USUBO:              return visitSUBO(N);
1576   case ISD::ADDE:               return visitADDE(N);
1577   case ISD::ADDCARRY:           return visitADDCARRY(N);
1578   case ISD::SUBE:               return visitSUBE(N);
1579   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1580   case ISD::MUL:                return visitMUL(N);
1581   case ISD::SDIV:               return visitSDIV(N);
1582   case ISD::UDIV:               return visitUDIV(N);
1583   case ISD::SREM:
1584   case ISD::UREM:               return visitREM(N);
1585   case ISD::MULHU:              return visitMULHU(N);
1586   case ISD::MULHS:              return visitMULHS(N);
1587   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1588   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1589   case ISD::SMULO:
1590   case ISD::UMULO:              return visitMULO(N);
1591   case ISD::SMIN:
1592   case ISD::SMAX:
1593   case ISD::UMIN:
1594   case ISD::UMAX:               return visitIMINMAX(N);
1595   case ISD::AND:                return visitAND(N);
1596   case ISD::OR:                 return visitOR(N);
1597   case ISD::XOR:                return visitXOR(N);
1598   case ISD::SHL:                return visitSHL(N);
1599   case ISD::SRA:                return visitSRA(N);
1600   case ISD::SRL:                return visitSRL(N);
1601   case ISD::ROTR:
1602   case ISD::ROTL:               return visitRotate(N);
1603   case ISD::FSHL:
1604   case ISD::FSHR:               return visitFunnelShift(N);
1605   case ISD::ABS:                return visitABS(N);
1606   case ISD::BSWAP:              return visitBSWAP(N);
1607   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1608   case ISD::CTLZ:               return visitCTLZ(N);
1609   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1610   case ISD::CTTZ:               return visitCTTZ(N);
1611   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1612   case ISD::CTPOP:              return visitCTPOP(N);
1613   case ISD::SELECT:             return visitSELECT(N);
1614   case ISD::VSELECT:            return visitVSELECT(N);
1615   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1616   case ISD::SETCC:              return visitSETCC(N);
1617   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1618   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1619   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1620   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1621   case ISD::AssertSext:
1622   case ISD::AssertZext:         return visitAssertExt(N);
1623   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1624   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1625   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1626   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1627   case ISD::BITCAST:            return visitBITCAST(N);
1628   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1629   case ISD::FADD:               return visitFADD(N);
1630   case ISD::FSUB:               return visitFSUB(N);
1631   case ISD::FMUL:               return visitFMUL(N);
1632   case ISD::FMA:                return visitFMA(N);
1633   case ISD::FDIV:               return visitFDIV(N);
1634   case ISD::FREM:               return visitFREM(N);
1635   case ISD::FSQRT:              return visitFSQRT(N);
1636   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1637   case ISD::FPOW:               return visitFPOW(N);
1638   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1639   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1640   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1641   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1642   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1643   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1644   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1645   case ISD::FNEG:               return visitFNEG(N);
1646   case ISD::FABS:               return visitFABS(N);
1647   case ISD::FFLOOR:             return visitFFLOOR(N);
1648   case ISD::FMINNUM:            return visitFMINNUM(N);
1649   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1650   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1651   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1652   case ISD::FCEIL:              return visitFCEIL(N);
1653   case ISD::FTRUNC:             return visitFTRUNC(N);
1654   case ISD::BRCOND:             return visitBRCOND(N);
1655   case ISD::BR_CC:              return visitBR_CC(N);
1656   case ISD::LOAD:               return visitLOAD(N);
1657   case ISD::STORE:              return visitSTORE(N);
1658   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1659   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1660   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1661   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1662   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1663   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1664   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1665   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1666   case ISD::MGATHER:            return visitMGATHER(N);
1667   case ISD::MLOAD:              return visitMLOAD(N);
1668   case ISD::MSCATTER:           return visitMSCATTER(N);
1669   case ISD::MSTORE:             return visitMSTORE(N);
1670   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1671   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1672   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1673   case ISD::VECREDUCE_FADD:
1674   case ISD::VECREDUCE_FMUL:
1675   case ISD::VECREDUCE_ADD:
1676   case ISD::VECREDUCE_MUL:
1677   case ISD::VECREDUCE_AND:
1678   case ISD::VECREDUCE_OR:
1679   case ISD::VECREDUCE_XOR:
1680   case ISD::VECREDUCE_SMAX:
1681   case ISD::VECREDUCE_SMIN:
1682   case ISD::VECREDUCE_UMAX:
1683   case ISD::VECREDUCE_UMIN:
1684   case ISD::VECREDUCE_FMAX:
1685   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1686   }
1687   return SDValue();
1688 }
1689 
1690 SDValue DAGCombiner::combine(SDNode *N) {
1691   SDValue RV = visit(N);
1692 
1693   // If nothing happened, try a target-specific DAG combine.
1694   if (!RV.getNode()) {
1695     assert(N->getOpcode() != ISD::DELETED_NODE &&
1696            "Node was deleted but visit returned NULL!");
1697 
1698     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1699         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1700 
1701       // Expose the DAG combiner to the target combiner impls.
1702       TargetLowering::DAGCombinerInfo
1703         DagCombineInfo(DAG, Level, false, this);
1704 
1705       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1706     }
1707   }
1708 
1709   // If nothing happened still, try promoting the operation.
1710   if (!RV.getNode()) {
1711     switch (N->getOpcode()) {
1712     default: break;
1713     case ISD::ADD:
1714     case ISD::SUB:
1715     case ISD::MUL:
1716     case ISD::AND:
1717     case ISD::OR:
1718     case ISD::XOR:
1719       RV = PromoteIntBinOp(SDValue(N, 0));
1720       break;
1721     case ISD::SHL:
1722     case ISD::SRA:
1723     case ISD::SRL:
1724       RV = PromoteIntShiftOp(SDValue(N, 0));
1725       break;
1726     case ISD::SIGN_EXTEND:
1727     case ISD::ZERO_EXTEND:
1728     case ISD::ANY_EXTEND:
1729       RV = PromoteExtend(SDValue(N, 0));
1730       break;
1731     case ISD::LOAD:
1732       if (PromoteLoad(SDValue(N, 0)))
1733         RV = SDValue(N, 0);
1734       break;
1735     }
1736   }
1737 
1738   // If N is a commutative binary node, try to eliminate it if the commuted
1739   // version is already present in the DAG.
1740   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1741       N->getNumValues() == 1) {
1742     SDValue N0 = N->getOperand(0);
1743     SDValue N1 = N->getOperand(1);
1744 
1745     // Constant operands are canonicalized to RHS.
1746     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1747       SDValue Ops[] = {N1, N0};
1748       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1749                                             N->getFlags());
1750       if (CSENode)
1751         return SDValue(CSENode, 0);
1752     }
1753   }
1754 
1755   return RV;
1756 }
1757 
1758 /// Given a node, return its input chain if it has one, otherwise return a null
1759 /// sd operand.
1760 static SDValue getInputChainForNode(SDNode *N) {
1761   if (unsigned NumOps = N->getNumOperands()) {
1762     if (N->getOperand(0).getValueType() == MVT::Other)
1763       return N->getOperand(0);
1764     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1765       return N->getOperand(NumOps-1);
1766     for (unsigned i = 1; i < NumOps-1; ++i)
1767       if (N->getOperand(i).getValueType() == MVT::Other)
1768         return N->getOperand(i);
1769   }
1770   return SDValue();
1771 }
1772 
1773 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1774   // If N has two operands, where one has an input chain equal to the other,
1775   // the 'other' chain is redundant.
1776   if (N->getNumOperands() == 2) {
1777     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1778       return N->getOperand(0);
1779     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1780       return N->getOperand(1);
1781   }
1782 
1783   // Don't simplify token factors if optnone.
1784   if (OptLevel == CodeGenOpt::None)
1785     return SDValue();
1786 
1787   // If the sole user is a token factor, we should make sure we have a
1788   // chance to merge them together. This prevents TF chains from inhibiting
1789   // optimizations.
1790   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1791     AddToWorklist(*(N->use_begin()));
1792 
1793   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1794   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1795   SmallPtrSet<SDNode*, 16> SeenOps;
1796   bool Changed = false;             // If we should replace this token factor.
1797 
1798   // Start out with this token factor.
1799   TFs.push_back(N);
1800 
1801   // Iterate through token factors.  The TFs grows when new token factors are
1802   // encountered.
1803   for (unsigned i = 0; i < TFs.size(); ++i) {
1804     SDNode *TF = TFs[i];
1805 
1806     // Check each of the operands.
1807     for (const SDValue &Op : TF->op_values()) {
1808       switch (Op.getOpcode()) {
1809       case ISD::EntryToken:
1810         // Entry tokens don't need to be added to the list. They are
1811         // redundant.
1812         Changed = true;
1813         break;
1814 
1815       case ISD::TokenFactor:
1816         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1817           // Queue up for processing.
1818           TFs.push_back(Op.getNode());
1819           // Clean up in case the token factor is removed.
1820           AddToWorklist(Op.getNode());
1821           Changed = true;
1822           break;
1823         }
1824         LLVM_FALLTHROUGH;
1825 
1826       default:
1827         // Only add if it isn't already in the list.
1828         if (SeenOps.insert(Op.getNode()).second)
1829           Ops.push_back(Op);
1830         else
1831           Changed = true;
1832         break;
1833       }
1834     }
1835   }
1836 
1837   // Remove Nodes that are chained to another node in the list. Do so
1838   // by walking up chains breath-first stopping when we've seen
1839   // another operand. In general we must climb to the EntryNode, but we can exit
1840   // early if we find all remaining work is associated with just one operand as
1841   // no further pruning is possible.
1842 
1843   // List of nodes to search through and original Ops from which they originate.
1844   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1845   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1846   SmallPtrSet<SDNode *, 16> SeenChains;
1847   bool DidPruneOps = false;
1848 
1849   unsigned NumLeftToConsider = 0;
1850   for (const SDValue &Op : Ops) {
1851     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1852     OpWorkCount.push_back(1);
1853   }
1854 
1855   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1856     // If this is an Op, we can remove the op from the list. Remark any
1857     // search associated with it as from the current OpNumber.
1858     if (SeenOps.count(Op) != 0) {
1859       Changed = true;
1860       DidPruneOps = true;
1861       unsigned OrigOpNumber = 0;
1862       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1863         OrigOpNumber++;
1864       assert((OrigOpNumber != Ops.size()) &&
1865              "expected to find TokenFactor Operand");
1866       // Re-mark worklist from OrigOpNumber to OpNumber
1867       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1868         if (Worklist[i].second == OrigOpNumber) {
1869           Worklist[i].second = OpNumber;
1870         }
1871       }
1872       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1873       OpWorkCount[OrigOpNumber] = 0;
1874       NumLeftToConsider--;
1875     }
1876     // Add if it's a new chain
1877     if (SeenChains.insert(Op).second) {
1878       OpWorkCount[OpNumber]++;
1879       Worklist.push_back(std::make_pair(Op, OpNumber));
1880     }
1881   };
1882 
1883   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1884     // We need at least be consider at least 2 Ops to prune.
1885     if (NumLeftToConsider <= 1)
1886       break;
1887     auto CurNode = Worklist[i].first;
1888     auto CurOpNumber = Worklist[i].second;
1889     assert((OpWorkCount[CurOpNumber] > 0) &&
1890            "Node should not appear in worklist");
1891     switch (CurNode->getOpcode()) {
1892     case ISD::EntryToken:
1893       // Hitting EntryToken is the only way for the search to terminate without
1894       // hitting
1895       // another operand's search. Prevent us from marking this operand
1896       // considered.
1897       NumLeftToConsider++;
1898       break;
1899     case ISD::TokenFactor:
1900       for (const SDValue &Op : CurNode->op_values())
1901         AddToWorklist(i, Op.getNode(), CurOpNumber);
1902       break;
1903     case ISD::LIFETIME_START:
1904     case ISD::LIFETIME_END:
1905     case ISD::CopyFromReg:
1906     case ISD::CopyToReg:
1907       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1908       break;
1909     default:
1910       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1911         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1912       break;
1913     }
1914     OpWorkCount[CurOpNumber]--;
1915     if (OpWorkCount[CurOpNumber] == 0)
1916       NumLeftToConsider--;
1917   }
1918 
1919   // If we've changed things around then replace token factor.
1920   if (Changed) {
1921     SDValue Result;
1922     if (Ops.empty()) {
1923       // The entry token is the only possible outcome.
1924       Result = DAG.getEntryNode();
1925     } else {
1926       if (DidPruneOps) {
1927         SmallVector<SDValue, 8> PrunedOps;
1928         //
1929         for (const SDValue &Op : Ops) {
1930           if (SeenChains.count(Op.getNode()) == 0)
1931             PrunedOps.push_back(Op);
1932         }
1933         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1934       } else {
1935         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1936       }
1937     }
1938     return Result;
1939   }
1940   return SDValue();
1941 }
1942 
1943 /// MERGE_VALUES can always be eliminated.
1944 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1945   WorklistRemover DeadNodes(*this);
1946   // Replacing results may cause a different MERGE_VALUES to suddenly
1947   // be CSE'd with N, and carry its uses with it. Iterate until no
1948   // uses remain, to ensure that the node can be safely deleted.
1949   // First add the users of this node to the work list so that they
1950   // can be tried again once they have new operands.
1951   AddUsersToWorklist(N);
1952   do {
1953     // Do as a single replacement to avoid rewalking use lists.
1954     SmallVector<SDValue, 8> Ops;
1955     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1956       Ops.push_back(N->getOperand(i));
1957     DAG.ReplaceAllUsesWith(N, Ops.data());
1958   } while (!N->use_empty());
1959   deleteAndRecombine(N);
1960   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1961 }
1962 
1963 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1964 /// ConstantSDNode pointer else nullptr.
1965 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1966   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1967   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1968 }
1969 
1970 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1971   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1972          "Unexpected binary operator");
1973 
1974   // Don't do this unless the old select is going away. We want to eliminate the
1975   // binary operator, not replace a binop with a select.
1976   // TODO: Handle ISD::SELECT_CC.
1977   unsigned SelOpNo = 0;
1978   SDValue Sel = BO->getOperand(0);
1979   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1980     SelOpNo = 1;
1981     Sel = BO->getOperand(1);
1982   }
1983 
1984   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1985     return SDValue();
1986 
1987   SDValue CT = Sel.getOperand(1);
1988   if (!isConstantOrConstantVector(CT, true) &&
1989       !isConstantFPBuildVectorOrConstantFP(CT))
1990     return SDValue();
1991 
1992   SDValue CF = Sel.getOperand(2);
1993   if (!isConstantOrConstantVector(CF, true) &&
1994       !isConstantFPBuildVectorOrConstantFP(CF))
1995     return SDValue();
1996 
1997   // Bail out if any constants are opaque because we can't constant fold those.
1998   // The exception is "and" and "or" with either 0 or -1 in which case we can
1999   // propagate non constant operands into select. I.e.:
2000   // and (select Cond, 0, -1), X --> select Cond, 0, X
2001   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2002   auto BinOpcode = BO->getOpcode();
2003   bool CanFoldNonConst =
2004       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2005       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2006       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2007 
2008   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2009   if (!CanFoldNonConst &&
2010       !isConstantOrConstantVector(CBO, true) &&
2011       !isConstantFPBuildVectorOrConstantFP(CBO))
2012     return SDValue();
2013 
2014   EVT VT = Sel.getValueType();
2015 
2016   // In case of shift value and shift amount may have different VT. For instance
2017   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2018   // swapped operands and value types do not match. NB: x86 is fine if operands
2019   // are not swapped with shift amount VT being not bigger than shifted value.
2020   // TODO: that is possible to check for a shift operation, correct VTs and
2021   // still perform optimization on x86 if needed.
2022   if (SelOpNo && VT != CBO.getValueType())
2023     return SDValue();
2024 
2025   // We have a select-of-constants followed by a binary operator with a
2026   // constant. Eliminate the binop by pulling the constant math into the select.
2027   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2028   SDLoc DL(Sel);
2029   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2030                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2031   if (!CanFoldNonConst && !NewCT.isUndef() &&
2032       !isConstantOrConstantVector(NewCT, true) &&
2033       !isConstantFPBuildVectorOrConstantFP(NewCT))
2034     return SDValue();
2035 
2036   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2037                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2038   if (!CanFoldNonConst && !NewCF.isUndef() &&
2039       !isConstantOrConstantVector(NewCF, true) &&
2040       !isConstantFPBuildVectorOrConstantFP(NewCF))
2041     return SDValue();
2042 
2043   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2044 }
2045 
2046 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2047   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2048          "Expecting add or sub");
2049 
2050   // Match a constant operand and a zext operand for the math instruction:
2051   // add Z, C
2052   // sub C, Z
2053   bool IsAdd = N->getOpcode() == ISD::ADD;
2054   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2055   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2056   auto *CN = dyn_cast<ConstantSDNode>(C);
2057   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2058     return SDValue();
2059 
2060   // Match the zext operand as a setcc of a boolean.
2061   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2062       Z.getOperand(0).getValueType() != MVT::i1)
2063     return SDValue();
2064 
2065   // Match the compare as: setcc (X & 1), 0, eq.
2066   SDValue SetCC = Z.getOperand(0);
2067   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2068   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2069       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2070       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2071     return SDValue();
2072 
2073   // We are adding/subtracting a constant and an inverted low bit. Turn that
2074   // into a subtract/add of the low bit with incremented/decremented constant:
2075   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2076   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2077   EVT VT = C.getValueType();
2078   SDLoc DL(N);
2079   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2080   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2081                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2082   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2083 }
2084 
2085 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2086 /// a shift and add with a different constant.
2087 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2088   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2089          "Expecting add or sub");
2090 
2091   // We need a constant operand for the add/sub, and the other operand is a
2092   // logical shift right: add (srl), C or sub C, (srl).
2093   bool IsAdd = N->getOpcode() == ISD::ADD;
2094   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2095   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2096   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2097   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2098     return SDValue();
2099 
2100   // The shift must be of a 'not' value.
2101   SDValue Not = ShiftOp.getOperand(0);
2102   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2103     return SDValue();
2104 
2105   // The shift must be moving the sign bit to the least-significant-bit.
2106   EVT VT = ShiftOp.getValueType();
2107   SDValue ShAmt = ShiftOp.getOperand(1);
2108   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2109   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2110     return SDValue();
2111 
2112   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2113   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2114   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2115   SDLoc DL(N);
2116   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2117   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2118   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2119   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2120 }
2121 
2122 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2123 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2124 /// are no common bits set in the operands).
2125 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2126   SDValue N0 = N->getOperand(0);
2127   SDValue N1 = N->getOperand(1);
2128   EVT VT = N0.getValueType();
2129   SDLoc DL(N);
2130 
2131   // fold vector ops
2132   if (VT.isVector()) {
2133     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2134       return FoldedVOp;
2135 
2136     // fold (add x, 0) -> x, vector edition
2137     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2138       return N0;
2139     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2140       return N1;
2141   }
2142 
2143   // fold (add x, undef) -> undef
2144   if (N0.isUndef())
2145     return N0;
2146 
2147   if (N1.isUndef())
2148     return N1;
2149 
2150   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2151     // canonicalize constant to RHS
2152     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2153       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2154     // fold (add c1, c2) -> c1+c2
2155     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2156                                       N1.getNode());
2157   }
2158 
2159   // fold (add x, 0) -> x
2160   if (isNullConstant(N1))
2161     return N0;
2162 
2163   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2164     // fold ((A-c1)+c2) -> (A+(c2-c1))
2165     if (N0.getOpcode() == ISD::SUB &&
2166         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2167       SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2168                                                N0.getOperand(1).getNode());
2169       assert(Sub && "Constant folding failed");
2170       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2171     }
2172 
2173     // fold ((c1-A)+c2) -> (c1+c2)-A
2174     if (N0.getOpcode() == ISD::SUB &&
2175         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2176       SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2177                                                N0.getOperand(0).getNode());
2178       assert(Add && "Constant folding failed");
2179       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2180     }
2181 
2182     // add (sext i1 X), 1 -> zext (not i1 X)
2183     // We don't transform this pattern:
2184     //   add (zext i1 X), -1 -> sext (not i1 X)
2185     // because most (?) targets generate better code for the zext form.
2186     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2187         isOneOrOneSplat(N1)) {
2188       SDValue X = N0.getOperand(0);
2189       if ((!LegalOperations ||
2190            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2191             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2192           X.getScalarValueSizeInBits() == 1) {
2193         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2194         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2195       }
2196     }
2197 
2198     // Undo the add -> or combine to merge constant offsets from a frame index.
2199     if (N0.getOpcode() == ISD::OR &&
2200         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2201         isa<ConstantSDNode>(N0.getOperand(1)) &&
2202         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2203       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2204       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2205     }
2206   }
2207 
2208   if (SDValue NewSel = foldBinOpIntoSelect(N))
2209     return NewSel;
2210 
2211   // reassociate add
2212   if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2213     return RADD;
2214 
2215   // fold ((0-A) + B) -> B-A
2216   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2217     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2218 
2219   // fold (A + (0-B)) -> A-B
2220   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2221     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2222 
2223   // fold (A+(B-A)) -> B
2224   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2225     return N1.getOperand(0);
2226 
2227   // fold ((B-A)+A) -> B
2228   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2229     return N0.getOperand(0);
2230 
2231   // fold ((A-B)+(C-A)) -> (C-B)
2232   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2233       N0.getOperand(0) == N1.getOperand(1))
2234     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2235                        N0.getOperand(1));
2236 
2237   // fold ((A-B)+(B-C)) -> (A-C)
2238   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2239       N0.getOperand(1) == N1.getOperand(0))
2240     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2241                        N1.getOperand(1));
2242 
2243   // fold (A+(B-(A+C))) to (B-C)
2244   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2245       N0 == N1.getOperand(1).getOperand(0))
2246     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2247                        N1.getOperand(1).getOperand(1));
2248 
2249   // fold (A+(B-(C+A))) to (B-C)
2250   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2251       N0 == N1.getOperand(1).getOperand(1))
2252     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2253                        N1.getOperand(1).getOperand(0));
2254 
2255   // fold (A+((B-A)+or-C)) to (B+or-C)
2256   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2257       N1.getOperand(0).getOpcode() == ISD::SUB &&
2258       N0 == N1.getOperand(0).getOperand(1))
2259     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2260                        N1.getOperand(1));
2261 
2262   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2263   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2264     SDValue N00 = N0.getOperand(0);
2265     SDValue N01 = N0.getOperand(1);
2266     SDValue N10 = N1.getOperand(0);
2267     SDValue N11 = N1.getOperand(1);
2268 
2269     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2270       return DAG.getNode(ISD::SUB, DL, VT,
2271                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2272                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2273   }
2274 
2275   // fold (add (umax X, C), -C) --> (usubsat X, C)
2276   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2277     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2278       return (!Max && !Op) ||
2279              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2280     };
2281     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2282                                   /*AllowUndefs*/ true))
2283       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2284                          N0.getOperand(1));
2285   }
2286 
2287   if (SimplifyDemandedBits(SDValue(N, 0)))
2288     return SDValue(N, 0);
2289 
2290   if (isOneOrOneSplat(N1)) {
2291     // fold (add (xor a, -1), 1) -> (sub 0, a)
2292     if (isBitwiseNot(N0))
2293       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2294                          N0.getOperand(0));
2295 
2296     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2297     if (N0.getOpcode() == ISD::ADD ||
2298         N0.getOpcode() == ISD::UADDO ||
2299         N0.getOpcode() == ISD::SADDO) {
2300       SDValue A, Xor;
2301 
2302       if (isBitwiseNot(N0.getOperand(0))) {
2303         A = N0.getOperand(1);
2304         Xor = N0.getOperand(0);
2305       } else if (isBitwiseNot(N0.getOperand(1))) {
2306         A = N0.getOperand(0);
2307         Xor = N0.getOperand(1);
2308       }
2309 
2310       if (Xor)
2311         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2312     }
2313   }
2314 
2315   // (x - y) + -1  ->  add (xor y, -1), x
2316   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2317       isAllOnesOrAllOnesSplat(N1)) {
2318     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2319     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2320   }
2321 
2322   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2323     return Combined;
2324 
2325   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2326     return Combined;
2327 
2328   return SDValue();
2329 }
2330 
2331 SDValue DAGCombiner::visitADD(SDNode *N) {
2332   SDValue N0 = N->getOperand(0);
2333   SDValue N1 = N->getOperand(1);
2334   EVT VT = N0.getValueType();
2335   SDLoc DL(N);
2336 
2337   if (SDValue Combined = visitADDLike(N))
2338     return Combined;
2339 
2340   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2341     return V;
2342 
2343   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2344     return V;
2345 
2346   // fold (a+b) -> (a|b) iff a and b share no bits.
2347   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2348       DAG.haveNoCommonBitsSet(N0, N1))
2349     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2350 
2351   return SDValue();
2352 }
2353 
2354 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2355   unsigned Opcode = N->getOpcode();
2356   SDValue N0 = N->getOperand(0);
2357   SDValue N1 = N->getOperand(1);
2358   EVT VT = N0.getValueType();
2359   SDLoc DL(N);
2360 
2361   // fold vector ops
2362   if (VT.isVector()) {
2363     // TODO SimplifyVBinOp
2364 
2365     // fold (add_sat x, 0) -> x, vector edition
2366     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2367       return N0;
2368     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2369       return N1;
2370   }
2371 
2372   // fold (add_sat x, undef) -> -1
2373   if (N0.isUndef() || N1.isUndef())
2374     return DAG.getAllOnesConstant(DL, VT);
2375 
2376   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2377     // canonicalize constant to RHS
2378     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2379       return DAG.getNode(Opcode, DL, VT, N1, N0);
2380     // fold (add_sat c1, c2) -> c3
2381     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2382                                       N1.getNode());
2383   }
2384 
2385   // fold (add_sat x, 0) -> x
2386   if (isNullConstant(N1))
2387     return N0;
2388 
2389   // If it cannot overflow, transform into an add.
2390   if (Opcode == ISD::UADDSAT)
2391     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2392       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2393 
2394   return SDValue();
2395 }
2396 
2397 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2398   bool Masked = false;
2399 
2400   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2401   while (true) {
2402     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2403       V = V.getOperand(0);
2404       continue;
2405     }
2406 
2407     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2408       Masked = true;
2409       V = V.getOperand(0);
2410       continue;
2411     }
2412 
2413     break;
2414   }
2415 
2416   // If this is not a carry, return.
2417   if (V.getResNo() != 1)
2418     return SDValue();
2419 
2420   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2421       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2422     return SDValue();
2423 
2424   EVT VT = V.getNode()->getValueType(0);
2425   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2426     return SDValue();
2427 
2428   // If the result is masked, then no matter what kind of bool it is we can
2429   // return. If it isn't, then we need to make sure the bool type is either 0 or
2430   // 1 and not other values.
2431   if (Masked ||
2432       TLI.getBooleanContents(V.getValueType()) ==
2433           TargetLoweringBase::ZeroOrOneBooleanContent)
2434     return V;
2435 
2436   return SDValue();
2437 }
2438 
2439 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2440 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2441 /// the opcode and bypass the mask operation.
2442 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2443                                  SelectionDAG &DAG, const SDLoc &DL) {
2444   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2445     return SDValue();
2446 
2447   EVT VT = N0.getValueType();
2448   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2449     return SDValue();
2450 
2451   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2452   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2453   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2454 }
2455 
2456 /// Helper for doing combines based on N0 and N1 being added to each other.
2457 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2458                                           SDNode *LocReference) {
2459   EVT VT = N0.getValueType();
2460   SDLoc DL(LocReference);
2461 
2462   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2463   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2464       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2465     return DAG.getNode(ISD::SUB, DL, VT, N0,
2466                        DAG.getNode(ISD::SHL, DL, VT,
2467                                    N1.getOperand(0).getOperand(1),
2468                                    N1.getOperand(1)));
2469 
2470   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2471     return V;
2472 
2473   // Hoist one-use subtraction by non-opaque constant:
2474   //   (x - C) + y  ->  (x + y) - C
2475   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2476   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2477       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2478     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2479     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2480   }
2481 
2482   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2483   // rather than 'add 0/-1' (the zext should get folded).
2484   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2485   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2486       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2487       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2488     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2489     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2490   }
2491 
2492   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2493   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2494     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2495     if (TN->getVT() == MVT::i1) {
2496       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2497                                  DAG.getConstant(1, DL, VT));
2498       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2499     }
2500   }
2501 
2502   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2503   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2504       N1.getResNo() == 0)
2505     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2506                        N0, N1.getOperand(0), N1.getOperand(2));
2507 
2508   // (add X, Carry) -> (addcarry X, 0, Carry)
2509   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2510     if (SDValue Carry = getAsCarry(TLI, N1))
2511       return DAG.getNode(ISD::ADDCARRY, DL,
2512                          DAG.getVTList(VT, Carry.getValueType()), N0,
2513                          DAG.getConstant(0, DL, VT), Carry);
2514 
2515   return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitADDC(SDNode *N) {
2519   SDValue N0 = N->getOperand(0);
2520   SDValue N1 = N->getOperand(1);
2521   EVT VT = N0.getValueType();
2522   SDLoc DL(N);
2523 
2524   // If the flag result is dead, turn this into an ADD.
2525   if (!N->hasAnyUseOfValue(1))
2526     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2527                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2528 
2529   // canonicalize constant to RHS.
2530   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2531   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2532   if (N0C && !N1C)
2533     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2534 
2535   // fold (addc x, 0) -> x + no carry out
2536   if (isNullConstant(N1))
2537     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2538                                         DL, MVT::Glue));
2539 
2540   // If it cannot overflow, transform into an add.
2541   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2542     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2543                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2544 
2545   return SDValue();
2546 }
2547 
2548 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2549                            SelectionDAG &DAG, const TargetLowering &TLI) {
2550   EVT VT = V.getValueType();
2551 
2552   SDValue Cst;
2553   switch (TLI.getBooleanContents(VT)) {
2554   case TargetLowering::ZeroOrOneBooleanContent:
2555   case TargetLowering::UndefinedBooleanContent:
2556     Cst = DAG.getConstant(1, DL, VT);
2557     break;
2558   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2559     Cst = DAG.getConstant(-1, DL, VT);
2560     break;
2561   }
2562 
2563   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2564 }
2565 
2566 static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
2567   if (V.getOpcode() != ISD::XOR)
2568     return SDValue();
2569 
2570   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2571   if (!Const)
2572     return SDValue();
2573 
2574   EVT VT = V.getValueType();
2575 
2576   bool IsFlip = false;
2577   switch(TLI.getBooleanContents(VT)) {
2578     case TargetLowering::ZeroOrOneBooleanContent:
2579       IsFlip = Const->isOne();
2580       break;
2581     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2582       IsFlip = Const->isAllOnesValue();
2583       break;
2584     case TargetLowering::UndefinedBooleanContent:
2585       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2586       break;
2587   }
2588 
2589   if (IsFlip)
2590     return V.getOperand(0);
2591   return SDValue();
2592 }
2593 
2594 SDValue DAGCombiner::visitADDO(SDNode *N) {
2595   SDValue N0 = N->getOperand(0);
2596   SDValue N1 = N->getOperand(1);
2597   EVT VT = N0.getValueType();
2598   bool IsSigned = (ISD::SADDO == N->getOpcode());
2599 
2600   EVT CarryVT = N->getValueType(1);
2601   SDLoc DL(N);
2602 
2603   // If the flag result is dead, turn this into an ADD.
2604   if (!N->hasAnyUseOfValue(1))
2605     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2606                      DAG.getUNDEF(CarryVT));
2607 
2608   // canonicalize constant to RHS.
2609   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2610       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2611     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2612 
2613   // fold (addo x, 0) -> x + no carry out
2614   if (isNullOrNullSplat(N1))
2615     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2616 
2617   if (!IsSigned) {
2618     // If it cannot overflow, transform into an add.
2619     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2620       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2621                        DAG.getConstant(0, DL, CarryVT));
2622 
2623     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2624     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2625       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2626                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2627       return CombineTo(N, Sub,
2628                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2629     }
2630 
2631     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2632       return Combined;
2633 
2634     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2635       return Combined;
2636   }
2637 
2638   return SDValue();
2639 }
2640 
2641 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2642   EVT VT = N0.getValueType();
2643   if (VT.isVector())
2644     return SDValue();
2645 
2646   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2647   // If Y + 1 cannot overflow.
2648   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2649     SDValue Y = N1.getOperand(0);
2650     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2651     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2652       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2653                          N1.getOperand(2));
2654   }
2655 
2656   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2657   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2658     if (SDValue Carry = getAsCarry(TLI, N1))
2659       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2660                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2661 
2662   return SDValue();
2663 }
2664 
2665 SDValue DAGCombiner::visitADDE(SDNode *N) {
2666   SDValue N0 = N->getOperand(0);
2667   SDValue N1 = N->getOperand(1);
2668   SDValue CarryIn = N->getOperand(2);
2669 
2670   // canonicalize constant to RHS
2671   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2672   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2673   if (N0C && !N1C)
2674     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2675                        N1, N0, CarryIn);
2676 
2677   // fold (adde x, y, false) -> (addc x, y)
2678   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2679     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2680 
2681   return SDValue();
2682 }
2683 
2684 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2685   SDValue N0 = N->getOperand(0);
2686   SDValue N1 = N->getOperand(1);
2687   SDValue CarryIn = N->getOperand(2);
2688   SDLoc DL(N);
2689 
2690   // canonicalize constant to RHS
2691   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2692   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2693   if (N0C && !N1C)
2694     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2695 
2696   // fold (addcarry x, y, false) -> (uaddo x, y)
2697   if (isNullConstant(CarryIn)) {
2698     if (!LegalOperations ||
2699         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2700       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2701   }
2702 
2703   EVT CarryVT = CarryIn.getValueType();
2704 
2705   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2706   if (isNullConstant(N0) && isNullConstant(N1)) {
2707     EVT VT = N0.getValueType();
2708     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2709     AddToWorklist(CarryExt.getNode());
2710     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2711                                     DAG.getConstant(1, DL, VT)),
2712                      DAG.getConstant(0, DL, CarryVT));
2713   }
2714 
2715   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2716   if (isBitwiseNot(N0) && isNullConstant(N1)) {
2717     if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2718       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2719                                 DAG.getConstant(0, DL, N0.getValueType()),
2720                                 N0.getOperand(0), B);
2721       return CombineTo(N, Sub,
2722                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2723     }
2724   }
2725 
2726   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2727     return Combined;
2728 
2729   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2730     return Combined;
2731 
2732   return SDValue();
2733 }
2734 
2735 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2736                                        SDNode *N) {
2737   // Iff the flag result is dead:
2738   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2739   if ((N0.getOpcode() == ISD::ADD ||
2740        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2741       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2742     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2743                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2744 
2745   /**
2746    * When one of the addcarry argument is itself a carry, we may be facing
2747    * a diamond carry propagation. In which case we try to transform the DAG
2748    * to ensure linear carry propagation if that is possible.
2749    *
2750    * We are trying to get:
2751    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2752    */
2753   if (auto Y = getAsCarry(TLI, N1)) {
2754     /**
2755      *            (uaddo A, B)
2756      *             /       \
2757      *          Carry      Sum
2758      *            |          \
2759      *            | (addcarry *, 0, Z)
2760      *            |       /
2761      *             \   Carry
2762      *              |   /
2763      * (addcarry X, *, *)
2764      */
2765     if (Y.getOpcode() == ISD::UADDO &&
2766         CarryIn.getResNo() == 1 &&
2767         CarryIn.getOpcode() == ISD::ADDCARRY &&
2768         isNullConstant(CarryIn.getOperand(1)) &&
2769         CarryIn.getOperand(0) == Y.getValue(0)) {
2770       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2771                               Y.getOperand(0), Y.getOperand(1),
2772                               CarryIn.getOperand(2));
2773       AddToWorklist(NewY.getNode());
2774       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2775                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2776                          NewY.getValue(1));
2777     }
2778   }
2779 
2780   return SDValue();
2781 }
2782 
2783 // Since it may not be valid to emit a fold to zero for vector initializers
2784 // check if we can before folding.
2785 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2786                              SelectionDAG &DAG, bool LegalOperations) {
2787   if (!VT.isVector())
2788     return DAG.getConstant(0, DL, VT);
2789   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2790     return DAG.getConstant(0, DL, VT);
2791   return SDValue();
2792 }
2793 
2794 SDValue DAGCombiner::visitSUB(SDNode *N) {
2795   SDValue N0 = N->getOperand(0);
2796   SDValue N1 = N->getOperand(1);
2797   EVT VT = N0.getValueType();
2798   SDLoc DL(N);
2799 
2800   // fold vector ops
2801   if (VT.isVector()) {
2802     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2803       return FoldedVOp;
2804 
2805     // fold (sub x, 0) -> x, vector edition
2806     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2807       return N0;
2808   }
2809 
2810   // fold (sub x, x) -> 0
2811   // FIXME: Refactor this and xor and other similar operations together.
2812   if (N0 == N1)
2813     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2814   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2815       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2816     // fold (sub c1, c2) -> c1-c2
2817     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2818                                       N1.getNode());
2819   }
2820 
2821   if (SDValue NewSel = foldBinOpIntoSelect(N))
2822     return NewSel;
2823 
2824   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2825 
2826   // fold (sub x, c) -> (add x, -c)
2827   if (N1C) {
2828     return DAG.getNode(ISD::ADD, DL, VT, N0,
2829                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2830   }
2831 
2832   if (isNullOrNullSplat(N0)) {
2833     unsigned BitWidth = VT.getScalarSizeInBits();
2834     // Right-shifting everything out but the sign bit followed by negation is
2835     // the same as flipping arithmetic/logical shift type without the negation:
2836     // -(X >>u 31) -> (X >>s 31)
2837     // -(X >>s 31) -> (X >>u 31)
2838     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2839       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2840       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2841         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2842         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2843           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2844       }
2845     }
2846 
2847     // 0 - X --> 0 if the sub is NUW.
2848     if (N->getFlags().hasNoUnsignedWrap())
2849       return N0;
2850 
2851     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2852       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2853       // N1 must be 0 because negating the minimum signed value is undefined.
2854       if (N->getFlags().hasNoSignedWrap())
2855         return N0;
2856 
2857       // 0 - X --> X if X is 0 or the minimum signed value.
2858       return N1;
2859     }
2860   }
2861 
2862   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2863   if (isAllOnesOrAllOnesSplat(N0))
2864     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2865 
2866   // fold (A - (0-B)) -> A+B
2867   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2868     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2869 
2870   // fold A-(A-B) -> B
2871   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2872     return N1.getOperand(1);
2873 
2874   // fold (A+B)-A -> B
2875   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2876     return N0.getOperand(1);
2877 
2878   // fold (A+B)-B -> A
2879   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2880     return N0.getOperand(0);
2881 
2882   // fold (A+C1)-C2 -> A+(C1-C2)
2883   if (N0.getOpcode() == ISD::ADD &&
2884       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2885       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2886     SDValue NewC = DAG.FoldConstantArithmetic(
2887         ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2888     assert(NewC && "Constant folding failed");
2889     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
2890   }
2891 
2892   // fold C2-(A+C1) -> (C2-C1)-A
2893   if (N1.getOpcode() == ISD::ADD) {
2894     SDValue N11 = N1.getOperand(1);
2895     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2896         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2897       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2898                                                 N11.getNode());
2899       assert(NewC && "Constant folding failed");
2900       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2901     }
2902   }
2903 
2904   // fold (A-C1)-C2 -> A-(C1+C2)
2905   if (N0.getOpcode() == ISD::SUB &&
2906       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2907       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2908     SDValue NewC = DAG.FoldConstantArithmetic(
2909         ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2910     assert(NewC && "Constant folding failed");
2911     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
2912   }
2913 
2914   // fold (c1-A)-c2 -> (c1-c2)-A
2915   if (N0.getOpcode() == ISD::SUB &&
2916       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2917       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
2918     SDValue NewC = DAG.FoldConstantArithmetic(
2919         ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
2920     assert(NewC && "Constant folding failed");
2921     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
2922   }
2923 
2924   // fold ((A+(B+or-C))-B) -> A+or-C
2925   if (N0.getOpcode() == ISD::ADD &&
2926       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2927        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2928       N0.getOperand(1).getOperand(0) == N1)
2929     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2930                        N0.getOperand(1).getOperand(1));
2931 
2932   // fold ((A+(C+B))-B) -> A+C
2933   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2934       N0.getOperand(1).getOperand(1) == N1)
2935     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2936                        N0.getOperand(1).getOperand(0));
2937 
2938   // fold ((A-(B-C))-C) -> A-B
2939   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2940       N0.getOperand(1).getOperand(1) == N1)
2941     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2942                        N0.getOperand(1).getOperand(0));
2943 
2944   // fold (A-(B-C)) -> A+(C-B)
2945   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2946     return DAG.getNode(ISD::ADD, DL, VT, N0,
2947                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2948                                    N1.getOperand(0)));
2949 
2950   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2951   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2952     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2953         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2954       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2955                                 N1.getOperand(0).getOperand(1),
2956                                 N1.getOperand(1));
2957       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2958     }
2959     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2960         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2961       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2962                                 N1.getOperand(0),
2963                                 N1.getOperand(1).getOperand(1));
2964       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2965     }
2966   }
2967 
2968   // If either operand of a sub is undef, the result is undef
2969   if (N0.isUndef())
2970     return N0;
2971   if (N1.isUndef())
2972     return N1;
2973 
2974   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2975     return V;
2976 
2977   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2978     return V;
2979 
2980   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2981     return V;
2982 
2983   // (x - y) - 1  ->  add (xor y, -1), x
2984   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
2985     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
2986                               DAG.getAllOnesConstant(DL, VT));
2987     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2988   }
2989 
2990   // Hoist one-use addition by non-opaque constant:
2991   //   (x + C) - y  ->  (x - y) + C
2992   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
2993       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2994     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
2995     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
2996   }
2997   // y - (x + C)  ->  (y - x) - C
2998   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
2999       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3000     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3001     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3002   }
3003   // (x - C) - y  ->  (x - y) - C
3004   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3005   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3006       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3007     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3008     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3009   }
3010 
3011   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3012   // rather than 'sub 0/1' (the sext should get folded).
3013   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3014   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3015       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3016       TLI.getBooleanContents(VT) ==
3017           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3018     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3019     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3020   }
3021 
3022   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3023   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3024     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3025       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3026       SDValue S0 = N1.getOperand(0);
3027       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3028         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3029         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3030           if (C->getAPIntValue() == (OpSizeInBits - 1))
3031             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3032       }
3033     }
3034   }
3035 
3036   // If the relocation model supports it, consider symbol offsets.
3037   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3038     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3039       // fold (sub Sym, c) -> Sym-c
3040       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3041         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3042                                     GA->getOffset() -
3043                                         (uint64_t)N1C->getSExtValue());
3044       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3045       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3046         if (GA->getGlobal() == GB->getGlobal())
3047           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3048                                  DL, VT);
3049     }
3050 
3051   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3052   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3053     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3054     if (TN->getVT() == MVT::i1) {
3055       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3056                                  DAG.getConstant(1, DL, VT));
3057       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3058     }
3059   }
3060 
3061   // Prefer an add for more folding potential and possibly better codegen:
3062   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3063   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3064     SDValue ShAmt = N1.getOperand(1);
3065     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3066     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
3067       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3068       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3069     }
3070   }
3071 
3072   return SDValue();
3073 }
3074 
3075 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3076   SDValue N0 = N->getOperand(0);
3077   SDValue N1 = N->getOperand(1);
3078   EVT VT = N0.getValueType();
3079   SDLoc DL(N);
3080 
3081   // fold vector ops
3082   if (VT.isVector()) {
3083     // TODO SimplifyVBinOp
3084 
3085     // fold (sub_sat x, 0) -> x, vector edition
3086     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3087       return N0;
3088   }
3089 
3090   // fold (sub_sat x, undef) -> 0
3091   if (N0.isUndef() || N1.isUndef())
3092     return DAG.getConstant(0, DL, VT);
3093 
3094   // fold (sub_sat x, x) -> 0
3095   if (N0 == N1)
3096     return DAG.getConstant(0, DL, VT);
3097 
3098   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3099       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3100     // fold (sub_sat c1, c2) -> c3
3101     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3102                                       N1.getNode());
3103   }
3104 
3105   // fold (sub_sat x, 0) -> x
3106   if (isNullConstant(N1))
3107     return N0;
3108 
3109   return SDValue();
3110 }
3111 
3112 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3113   SDValue N0 = N->getOperand(0);
3114   SDValue N1 = N->getOperand(1);
3115   EVT VT = N0.getValueType();
3116   SDLoc DL(N);
3117 
3118   // If the flag result is dead, turn this into an SUB.
3119   if (!N->hasAnyUseOfValue(1))
3120     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3121                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3122 
3123   // fold (subc x, x) -> 0 + no borrow
3124   if (N0 == N1)
3125     return CombineTo(N, DAG.getConstant(0, DL, VT),
3126                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3127 
3128   // fold (subc x, 0) -> x + no borrow
3129   if (isNullConstant(N1))
3130     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3131 
3132   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3133   if (isAllOnesConstant(N0))
3134     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3135                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3136 
3137   return SDValue();
3138 }
3139 
3140 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3141   SDValue N0 = N->getOperand(0);
3142   SDValue N1 = N->getOperand(1);
3143   EVT VT = N0.getValueType();
3144   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3145 
3146   EVT CarryVT = N->getValueType(1);
3147   SDLoc DL(N);
3148 
3149   // If the flag result is dead, turn this into an SUB.
3150   if (!N->hasAnyUseOfValue(1))
3151     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3152                      DAG.getUNDEF(CarryVT));
3153 
3154   // fold (subo x, x) -> 0 + no borrow
3155   if (N0 == N1)
3156     return CombineTo(N, DAG.getConstant(0, DL, VT),
3157                      DAG.getConstant(0, DL, CarryVT));
3158 
3159   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3160 
3161   // fold (subox, c) -> (addo x, -c)
3162   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3163     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3164                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3165   }
3166 
3167   // fold (subo x, 0) -> x + no borrow
3168   if (isNullOrNullSplat(N1))
3169     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3170 
3171   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3172   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3173     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3174                      DAG.getConstant(0, DL, CarryVT));
3175 
3176   return SDValue();
3177 }
3178 
3179 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3180   SDValue N0 = N->getOperand(0);
3181   SDValue N1 = N->getOperand(1);
3182   SDValue CarryIn = N->getOperand(2);
3183 
3184   // fold (sube x, y, false) -> (subc x, y)
3185   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3186     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3187 
3188   return SDValue();
3189 }
3190 
3191 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3192   SDValue N0 = N->getOperand(0);
3193   SDValue N1 = N->getOperand(1);
3194   SDValue CarryIn = N->getOperand(2);
3195 
3196   // fold (subcarry x, y, false) -> (usubo x, y)
3197   if (isNullConstant(CarryIn)) {
3198     if (!LegalOperations ||
3199         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3200       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3201   }
3202 
3203   return SDValue();
3204 }
3205 
3206 SDValue DAGCombiner::visitMUL(SDNode *N) {
3207   SDValue N0 = N->getOperand(0);
3208   SDValue N1 = N->getOperand(1);
3209   EVT VT = N0.getValueType();
3210 
3211   // fold (mul x, undef) -> 0
3212   if (N0.isUndef() || N1.isUndef())
3213     return DAG.getConstant(0, SDLoc(N), VT);
3214 
3215   bool N0IsConst = false;
3216   bool N1IsConst = false;
3217   bool N1IsOpaqueConst = false;
3218   bool N0IsOpaqueConst = false;
3219   APInt ConstValue0, ConstValue1;
3220   // fold vector ops
3221   if (VT.isVector()) {
3222     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3223       return FoldedVOp;
3224 
3225     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3226     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3227     assert((!N0IsConst ||
3228             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3229            "Splat APInt should be element width");
3230     assert((!N1IsConst ||
3231             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3232            "Splat APInt should be element width");
3233   } else {
3234     N0IsConst = isa<ConstantSDNode>(N0);
3235     if (N0IsConst) {
3236       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3237       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3238     }
3239     N1IsConst = isa<ConstantSDNode>(N1);
3240     if (N1IsConst) {
3241       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3242       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3243     }
3244   }
3245 
3246   // fold (mul c1, c2) -> c1*c2
3247   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3248     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3249                                       N0.getNode(), N1.getNode());
3250 
3251   // canonicalize constant to RHS (vector doesn't have to splat)
3252   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3253      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3254     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3255   // fold (mul x, 0) -> 0
3256   if (N1IsConst && ConstValue1.isNullValue())
3257     return N1;
3258   // fold (mul x, 1) -> x
3259   if (N1IsConst && ConstValue1.isOneValue())
3260     return N0;
3261 
3262   if (SDValue NewSel = foldBinOpIntoSelect(N))
3263     return NewSel;
3264 
3265   // fold (mul x, -1) -> 0-x
3266   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3267     SDLoc DL(N);
3268     return DAG.getNode(ISD::SUB, DL, VT,
3269                        DAG.getConstant(0, DL, VT), N0);
3270   }
3271   // fold (mul x, (1 << c)) -> x << c
3272   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3273       DAG.isKnownToBeAPowerOfTwo(N1) &&
3274       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3275     SDLoc DL(N);
3276     SDValue LogBase2 = BuildLogBase2(N1, DL);
3277     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3278     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3279     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3280   }
3281   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3282   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3283     unsigned Log2Val = (-ConstValue1).logBase2();
3284     SDLoc DL(N);
3285     // FIXME: If the input is something that is easily negated (e.g. a
3286     // single-use add), we should put the negate there.
3287     return DAG.getNode(ISD::SUB, DL, VT,
3288                        DAG.getConstant(0, DL, VT),
3289                        DAG.getNode(ISD::SHL, DL, VT, N0,
3290                             DAG.getConstant(Log2Val, DL,
3291                                       getShiftAmountTy(N0.getValueType()))));
3292   }
3293 
3294   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3295   // mul x, (2^N + 1) --> add (shl x, N), x
3296   // mul x, (2^N - 1) --> sub (shl x, N), x
3297   // Examples: x * 33 --> (x << 5) + x
3298   //           x * 15 --> (x << 4) - x
3299   //           x * -33 --> -((x << 5) + x)
3300   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3301   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3302     // TODO: We could handle more general decomposition of any constant by
3303     //       having the target set a limit on number of ops and making a
3304     //       callback to determine that sequence (similar to sqrt expansion).
3305     unsigned MathOp = ISD::DELETED_NODE;
3306     APInt MulC = ConstValue1.abs();
3307     if ((MulC - 1).isPowerOf2())
3308       MathOp = ISD::ADD;
3309     else if ((MulC + 1).isPowerOf2())
3310       MathOp = ISD::SUB;
3311 
3312     if (MathOp != ISD::DELETED_NODE) {
3313       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3314                                           : (MulC + 1).logBase2();
3315       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3316              "Not expecting multiply-by-constant that could have simplified");
3317       SDLoc DL(N);
3318       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3319                                 DAG.getConstant(ShAmt, DL, VT));
3320       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3321       if (ConstValue1.isNegative())
3322         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3323       return R;
3324     }
3325   }
3326 
3327   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3328   if (N0.getOpcode() == ISD::SHL &&
3329       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3330       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3331     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3332     if (isConstantOrConstantVector(C3))
3333       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3334   }
3335 
3336   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3337   // use.
3338   {
3339     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3340 
3341     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3342     if (N0.getOpcode() == ISD::SHL &&
3343         isConstantOrConstantVector(N0.getOperand(1)) &&
3344         N0.getNode()->hasOneUse()) {
3345       Sh = N0; Y = N1;
3346     } else if (N1.getOpcode() == ISD::SHL &&
3347                isConstantOrConstantVector(N1.getOperand(1)) &&
3348                N1.getNode()->hasOneUse()) {
3349       Sh = N1; Y = N0;
3350     }
3351 
3352     if (Sh.getNode()) {
3353       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3354       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3355     }
3356   }
3357 
3358   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3359   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3360       N0.getOpcode() == ISD::ADD &&
3361       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3362       isMulAddWithConstProfitable(N, N0, N1))
3363       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3364                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3365                                      N0.getOperand(0), N1),
3366                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3367                                      N0.getOperand(1), N1));
3368 
3369   // reassociate mul
3370   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3371     return RMUL;
3372 
3373   return SDValue();
3374 }
3375 
3376 /// Return true if divmod libcall is available.
3377 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3378                                      const TargetLowering &TLI) {
3379   RTLIB::Libcall LC;
3380   EVT NodeType = Node->getValueType(0);
3381   if (!NodeType.isSimple())
3382     return false;
3383   switch (NodeType.getSimpleVT().SimpleTy) {
3384   default: return false; // No libcall for vector types.
3385   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3386   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3387   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3388   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3389   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3390   }
3391 
3392   return TLI.getLibcallName(LC) != nullptr;
3393 }
3394 
3395 /// Issue divrem if both quotient and remainder are needed.
3396 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3397   if (Node->use_empty())
3398     return SDValue(); // This is a dead node, leave it alone.
3399 
3400   unsigned Opcode = Node->getOpcode();
3401   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3402   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3403 
3404   // DivMod lib calls can still work on non-legal types if using lib-calls.
3405   EVT VT = Node->getValueType(0);
3406   if (VT.isVector() || !VT.isInteger())
3407     return SDValue();
3408 
3409   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3410     return SDValue();
3411 
3412   // If DIVREM is going to get expanded into a libcall,
3413   // but there is no libcall available, then don't combine.
3414   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3415       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3416     return SDValue();
3417 
3418   // If div is legal, it's better to do the normal expansion
3419   unsigned OtherOpcode = 0;
3420   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3421     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3422     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3423       return SDValue();
3424   } else {
3425     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3426     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3427       return SDValue();
3428   }
3429 
3430   SDValue Op0 = Node->getOperand(0);
3431   SDValue Op1 = Node->getOperand(1);
3432   SDValue combined;
3433   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3434          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3435     SDNode *User = *UI;
3436     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3437         User->use_empty())
3438       continue;
3439     // Convert the other matching node(s), too;
3440     // otherwise, the DIVREM may get target-legalized into something
3441     // target-specific that we won't be able to recognize.
3442     unsigned UserOpc = User->getOpcode();
3443     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3444         User->getOperand(0) == Op0 &&
3445         User->getOperand(1) == Op1) {
3446       if (!combined) {
3447         if (UserOpc == OtherOpcode) {
3448           SDVTList VTs = DAG.getVTList(VT, VT);
3449           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3450         } else if (UserOpc == DivRemOpc) {
3451           combined = SDValue(User, 0);
3452         } else {
3453           assert(UserOpc == Opcode);
3454           continue;
3455         }
3456       }
3457       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3458         CombineTo(User, combined);
3459       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3460         CombineTo(User, combined.getValue(1));
3461     }
3462   }
3463   return combined;
3464 }
3465 
3466 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3467   SDValue N0 = N->getOperand(0);
3468   SDValue N1 = N->getOperand(1);
3469   EVT VT = N->getValueType(0);
3470   SDLoc DL(N);
3471 
3472   unsigned Opc = N->getOpcode();
3473   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3474   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3475 
3476   // X / undef -> undef
3477   // X % undef -> undef
3478   // X / 0 -> undef
3479   // X % 0 -> undef
3480   // NOTE: This includes vectors where any divisor element is zero/undef.
3481   if (DAG.isUndef(Opc, {N0, N1}))
3482     return DAG.getUNDEF(VT);
3483 
3484   // undef / X -> 0
3485   // undef % X -> 0
3486   if (N0.isUndef())
3487     return DAG.getConstant(0, DL, VT);
3488 
3489   // 0 / X -> 0
3490   // 0 % X -> 0
3491   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3492   if (N0C && N0C->isNullValue())
3493     return N0;
3494 
3495   // X / X -> 1
3496   // X % X -> 0
3497   if (N0 == N1)
3498     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3499 
3500   // X / 1 -> X
3501   // X % 1 -> 0
3502   // If this is a boolean op (single-bit element type), we can't have
3503   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3504   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3505   // it's a 1.
3506   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3507     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3508 
3509   return SDValue();
3510 }
3511 
3512 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3513   SDValue N0 = N->getOperand(0);
3514   SDValue N1 = N->getOperand(1);
3515   EVT VT = N->getValueType(0);
3516   EVT CCVT = getSetCCResultType(VT);
3517 
3518   // fold vector ops
3519   if (VT.isVector())
3520     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3521       return FoldedVOp;
3522 
3523   SDLoc DL(N);
3524 
3525   // fold (sdiv c1, c2) -> c1/c2
3526   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3527   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3528   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3529     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3530   // fold (sdiv X, -1) -> 0-X
3531   if (N1C && N1C->isAllOnesValue())
3532     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3533   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3534   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3535     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3536                          DAG.getConstant(1, DL, VT),
3537                          DAG.getConstant(0, DL, VT));
3538 
3539   if (SDValue V = simplifyDivRem(N, DAG))
3540     return V;
3541 
3542   if (SDValue NewSel = foldBinOpIntoSelect(N))
3543     return NewSel;
3544 
3545   // If we know the sign bits of both operands are zero, strength reduce to a
3546   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3547   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3548     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3549 
3550   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3551     // If the corresponding remainder node exists, update its users with
3552     // (Dividend - (Quotient * Divisor).
3553     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3554                                               { N0, N1 })) {
3555       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3556       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3557       AddToWorklist(Mul.getNode());
3558       AddToWorklist(Sub.getNode());
3559       CombineTo(RemNode, Sub);
3560     }
3561     return V;
3562   }
3563 
3564   // sdiv, srem -> sdivrem
3565   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3566   // true.  Otherwise, we break the simplification logic in visitREM().
3567   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3568   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3569     if (SDValue DivRem = useDivRem(N))
3570         return DivRem;
3571 
3572   return SDValue();
3573 }
3574 
3575 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3576   SDLoc DL(N);
3577   EVT VT = N->getValueType(0);
3578   EVT CCVT = getSetCCResultType(VT);
3579   unsigned BitWidth = VT.getScalarSizeInBits();
3580 
3581   // Helper for determining whether a value is a power-2 constant scalar or a
3582   // vector of such elements.
3583   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3584     if (C->isNullValue() || C->isOpaque())
3585       return false;
3586     if (C->getAPIntValue().isPowerOf2())
3587       return true;
3588     if ((-C->getAPIntValue()).isPowerOf2())
3589       return true;
3590     return false;
3591   };
3592 
3593   // fold (sdiv X, pow2) -> simple ops after legalize
3594   // FIXME: We check for the exact bit here because the generic lowering gives
3595   // better results in that case. The target-specific lowering should learn how
3596   // to handle exact sdivs efficiently.
3597   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3598     // Target-specific implementation of sdiv x, pow2.
3599     if (SDValue Res = BuildSDIVPow2(N))
3600       return Res;
3601 
3602     // Create constants that are functions of the shift amount value.
3603     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3604     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3605     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3606     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3607     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3608     if (!isConstantOrConstantVector(Inexact))
3609       return SDValue();
3610 
3611     // Splat the sign bit into the register
3612     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3613                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3614     AddToWorklist(Sign.getNode());
3615 
3616     // Add (N0 < 0) ? abs2 - 1 : 0;
3617     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3618     AddToWorklist(Srl.getNode());
3619     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3620     AddToWorklist(Add.getNode());
3621     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3622     AddToWorklist(Sra.getNode());
3623 
3624     // Special case: (sdiv X, 1) -> X
3625     // Special Case: (sdiv X, -1) -> 0-X
3626     SDValue One = DAG.getConstant(1, DL, VT);
3627     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3628     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3629     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3630     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3631     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3632 
3633     // If dividing by a positive value, we're done. Otherwise, the result must
3634     // be negated.
3635     SDValue Zero = DAG.getConstant(0, DL, VT);
3636     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3637 
3638     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3639     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3640     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3641     return Res;
3642   }
3643 
3644   // If integer divide is expensive and we satisfy the requirements, emit an
3645   // alternate sequence.  Targets may check function attributes for size/speed
3646   // trade-offs.
3647   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3648   if (isConstantOrConstantVector(N1) &&
3649       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3650     if (SDValue Op = BuildSDIV(N))
3651       return Op;
3652 
3653   return SDValue();
3654 }
3655 
3656 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3657   SDValue N0 = N->getOperand(0);
3658   SDValue N1 = N->getOperand(1);
3659   EVT VT = N->getValueType(0);
3660   EVT CCVT = getSetCCResultType(VT);
3661 
3662   // fold vector ops
3663   if (VT.isVector())
3664     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3665       return FoldedVOp;
3666 
3667   SDLoc DL(N);
3668 
3669   // fold (udiv c1, c2) -> c1/c2
3670   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3671   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3672   if (N0C && N1C)
3673     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3674                                                     N0C, N1C))
3675       return Folded;
3676   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3677   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3678     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3679                          DAG.getConstant(1, DL, VT),
3680                          DAG.getConstant(0, DL, VT));
3681 
3682   if (SDValue V = simplifyDivRem(N, DAG))
3683     return V;
3684 
3685   if (SDValue NewSel = foldBinOpIntoSelect(N))
3686     return NewSel;
3687 
3688   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3689     // If the corresponding remainder node exists, update its users with
3690     // (Dividend - (Quotient * Divisor).
3691     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3692                                               { N0, N1 })) {
3693       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3694       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3695       AddToWorklist(Mul.getNode());
3696       AddToWorklist(Sub.getNode());
3697       CombineTo(RemNode, Sub);
3698     }
3699     return V;
3700   }
3701 
3702   // sdiv, srem -> sdivrem
3703   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3704   // true.  Otherwise, we break the simplification logic in visitREM().
3705   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3706   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3707     if (SDValue DivRem = useDivRem(N))
3708         return DivRem;
3709 
3710   return SDValue();
3711 }
3712 
3713 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3714   SDLoc DL(N);
3715   EVT VT = N->getValueType(0);
3716 
3717   // fold (udiv x, (1 << c)) -> x >>u c
3718   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3719       DAG.isKnownToBeAPowerOfTwo(N1)) {
3720     SDValue LogBase2 = BuildLogBase2(N1, DL);
3721     AddToWorklist(LogBase2.getNode());
3722 
3723     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3724     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3725     AddToWorklist(Trunc.getNode());
3726     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3727   }
3728 
3729   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3730   if (N1.getOpcode() == ISD::SHL) {
3731     SDValue N10 = N1.getOperand(0);
3732     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3733         DAG.isKnownToBeAPowerOfTwo(N10)) {
3734       SDValue LogBase2 = BuildLogBase2(N10, DL);
3735       AddToWorklist(LogBase2.getNode());
3736 
3737       EVT ADDVT = N1.getOperand(1).getValueType();
3738       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3739       AddToWorklist(Trunc.getNode());
3740       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3741       AddToWorklist(Add.getNode());
3742       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3743     }
3744   }
3745 
3746   // fold (udiv x, c) -> alternate
3747   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3748   if (isConstantOrConstantVector(N1) &&
3749       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3750     if (SDValue Op = BuildUDIV(N))
3751       return Op;
3752 
3753   return SDValue();
3754 }
3755 
3756 // handles ISD::SREM and ISD::UREM
3757 SDValue DAGCombiner::visitREM(SDNode *N) {
3758   unsigned Opcode = N->getOpcode();
3759   SDValue N0 = N->getOperand(0);
3760   SDValue N1 = N->getOperand(1);
3761   EVT VT = N->getValueType(0);
3762   EVT CCVT = getSetCCResultType(VT);
3763 
3764   bool isSigned = (Opcode == ISD::SREM);
3765   SDLoc DL(N);
3766 
3767   // fold (rem c1, c2) -> c1%c2
3768   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3769   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3770   if (N0C && N1C)
3771     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3772       return Folded;
3773   // fold (urem X, -1) -> select(X == -1, 0, x)
3774   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3775     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3776                          DAG.getConstant(0, DL, VT), N0);
3777 
3778   if (SDValue V = simplifyDivRem(N, DAG))
3779     return V;
3780 
3781   if (SDValue NewSel = foldBinOpIntoSelect(N))
3782     return NewSel;
3783 
3784   if (isSigned) {
3785     // If we know the sign bits of both operands are zero, strength reduce to a
3786     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3787     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3788       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3789   } else {
3790     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3791     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3792       // fold (urem x, pow2) -> (and x, pow2-1)
3793       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3794       AddToWorklist(Add.getNode());
3795       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3796     }
3797     if (N1.getOpcode() == ISD::SHL &&
3798         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3799       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3800       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3801       AddToWorklist(Add.getNode());
3802       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3803     }
3804   }
3805 
3806   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3807 
3808   // If X/C can be simplified by the division-by-constant logic, lower
3809   // X%C to the equivalent of X-X/C*C.
3810   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3811   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3812   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3813   // combine will not return a DIVREM.  Regardless, checking cheapness here
3814   // makes sense since the simplification results in fatter code.
3815   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3816     SDValue OptimizedDiv =
3817         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3818     if (OptimizedDiv.getNode()) {
3819       // If the equivalent Div node also exists, update its users.
3820       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3821       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3822                                                 { N0, N1 }))
3823         CombineTo(DivNode, OptimizedDiv);
3824       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3825       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3826       AddToWorklist(OptimizedDiv.getNode());
3827       AddToWorklist(Mul.getNode());
3828       return Sub;
3829     }
3830   }
3831 
3832   // sdiv, srem -> sdivrem
3833   if (SDValue DivRem = useDivRem(N))
3834     return DivRem.getValue(1);
3835 
3836   return SDValue();
3837 }
3838 
3839 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3840   SDValue N0 = N->getOperand(0);
3841   SDValue N1 = N->getOperand(1);
3842   EVT VT = N->getValueType(0);
3843   SDLoc DL(N);
3844 
3845   if (VT.isVector()) {
3846     // fold (mulhs x, 0) -> 0
3847     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3848       return N1;
3849     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3850       return N0;
3851   }
3852 
3853   // fold (mulhs x, 0) -> 0
3854   if (isNullConstant(N1))
3855     return N1;
3856   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3857   if (isOneConstant(N1))
3858     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3859                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3860                                        getShiftAmountTy(N0.getValueType())));
3861 
3862   // fold (mulhs x, undef) -> 0
3863   if (N0.isUndef() || N1.isUndef())
3864     return DAG.getConstant(0, DL, VT);
3865 
3866   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3867   // plus a shift.
3868   if (VT.isSimple() && !VT.isVector()) {
3869     MVT Simple = VT.getSimpleVT();
3870     unsigned SimpleSize = Simple.getSizeInBits();
3871     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3872     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3873       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3874       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3875       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3876       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3877             DAG.getConstant(SimpleSize, DL,
3878                             getShiftAmountTy(N1.getValueType())));
3879       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3880     }
3881   }
3882 
3883   return SDValue();
3884 }
3885 
3886 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3887   SDValue N0 = N->getOperand(0);
3888   SDValue N1 = N->getOperand(1);
3889   EVT VT = N->getValueType(0);
3890   SDLoc DL(N);
3891 
3892   if (VT.isVector()) {
3893     // fold (mulhu x, 0) -> 0
3894     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3895       return N1;
3896     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3897       return N0;
3898   }
3899 
3900   // fold (mulhu x, 0) -> 0
3901   if (isNullConstant(N1))
3902     return N1;
3903   // fold (mulhu x, 1) -> 0
3904   if (isOneConstant(N1))
3905     return DAG.getConstant(0, DL, N0.getValueType());
3906   // fold (mulhu x, undef) -> 0
3907   if (N0.isUndef() || N1.isUndef())
3908     return DAG.getConstant(0, DL, VT);
3909 
3910   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3911   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3912       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3913     unsigned NumEltBits = VT.getScalarSizeInBits();
3914     SDValue LogBase2 = BuildLogBase2(N1, DL);
3915     SDValue SRLAmt = DAG.getNode(
3916         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3917     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3918     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3919     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3920   }
3921 
3922   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3923   // plus a shift.
3924   if (VT.isSimple() && !VT.isVector()) {
3925     MVT Simple = VT.getSimpleVT();
3926     unsigned SimpleSize = Simple.getSizeInBits();
3927     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3928     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3929       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3930       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3931       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3932       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3933             DAG.getConstant(SimpleSize, DL,
3934                             getShiftAmountTy(N1.getValueType())));
3935       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3936     }
3937   }
3938 
3939   return SDValue();
3940 }
3941 
3942 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3943 /// give the opcodes for the two computations that are being performed. Return
3944 /// true if a simplification was made.
3945 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3946                                                 unsigned HiOp) {
3947   // If the high half is not needed, just compute the low half.
3948   bool HiExists = N->hasAnyUseOfValue(1);
3949   if (!HiExists && (!LegalOperations ||
3950                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3951     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3952     return CombineTo(N, Res, Res);
3953   }
3954 
3955   // If the low half is not needed, just compute the high half.
3956   bool LoExists = N->hasAnyUseOfValue(0);
3957   if (!LoExists && (!LegalOperations ||
3958                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3959     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3960     return CombineTo(N, Res, Res);
3961   }
3962 
3963   // If both halves are used, return as it is.
3964   if (LoExists && HiExists)
3965     return SDValue();
3966 
3967   // If the two computed results can be simplified separately, separate them.
3968   if (LoExists) {
3969     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3970     AddToWorklist(Lo.getNode());
3971     SDValue LoOpt = combine(Lo.getNode());
3972     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3973         (!LegalOperations ||
3974          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3975       return CombineTo(N, LoOpt, LoOpt);
3976   }
3977 
3978   if (HiExists) {
3979     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3980     AddToWorklist(Hi.getNode());
3981     SDValue HiOpt = combine(Hi.getNode());
3982     if (HiOpt.getNode() && HiOpt != Hi &&
3983         (!LegalOperations ||
3984          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3985       return CombineTo(N, HiOpt, HiOpt);
3986   }
3987 
3988   return SDValue();
3989 }
3990 
3991 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3992   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3993     return Res;
3994 
3995   EVT VT = N->getValueType(0);
3996   SDLoc DL(N);
3997 
3998   // If the type is twice as wide is legal, transform the mulhu to a wider
3999   // multiply plus a shift.
4000   if (VT.isSimple() && !VT.isVector()) {
4001     MVT Simple = VT.getSimpleVT();
4002     unsigned SimpleSize = Simple.getSizeInBits();
4003     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4004     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4005       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4006       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4007       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4008       // Compute the high part as N1.
4009       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4010             DAG.getConstant(SimpleSize, DL,
4011                             getShiftAmountTy(Lo.getValueType())));
4012       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4013       // Compute the low part as N0.
4014       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4015       return CombineTo(N, Lo, Hi);
4016     }
4017   }
4018 
4019   return SDValue();
4020 }
4021 
4022 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4023   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4024     return Res;
4025 
4026   EVT VT = N->getValueType(0);
4027   SDLoc DL(N);
4028 
4029   // If the type is twice as wide is legal, transform the mulhu to a wider
4030   // multiply plus a shift.
4031   if (VT.isSimple() && !VT.isVector()) {
4032     MVT Simple = VT.getSimpleVT();
4033     unsigned SimpleSize = Simple.getSizeInBits();
4034     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4035     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4036       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4037       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4038       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4039       // Compute the high part as N1.
4040       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4041             DAG.getConstant(SimpleSize, DL,
4042                             getShiftAmountTy(Lo.getValueType())));
4043       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4044       // Compute the low part as N0.
4045       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4046       return CombineTo(N, Lo, Hi);
4047     }
4048   }
4049 
4050   return SDValue();
4051 }
4052 
4053 SDValue DAGCombiner::visitMULO(SDNode *N) {
4054   bool IsSigned = (ISD::SMULO == N->getOpcode());
4055 
4056   // (mulo x, 2) -> (addo x, x)
4057   if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4058     if (C2->getAPIntValue() == 2)
4059       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
4060                          N->getVTList(), N->getOperand(0), N->getOperand(0));
4061 
4062   return SDValue();
4063 }
4064 
4065 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4066   SDValue N0 = N->getOperand(0);
4067   SDValue N1 = N->getOperand(1);
4068   EVT VT = N0.getValueType();
4069 
4070   // fold vector ops
4071   if (VT.isVector())
4072     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4073       return FoldedVOp;
4074 
4075   // fold operation with constant operands.
4076   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4077   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4078   if (N0C && N1C)
4079     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4080 
4081   // canonicalize constant to RHS
4082   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4083      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4084     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4085 
4086   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4087   // Only do this if the current op isn't legal and the flipped is.
4088   unsigned Opcode = N->getOpcode();
4089   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4090   if (!TLI.isOperationLegal(Opcode, VT) &&
4091       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4092       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4093     unsigned AltOpcode;
4094     switch (Opcode) {
4095     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4096     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4097     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4098     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4099     default: llvm_unreachable("Unknown MINMAX opcode");
4100     }
4101     if (TLI.isOperationLegal(AltOpcode, VT))
4102       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4103   }
4104 
4105   return SDValue();
4106 }
4107 
4108 /// If this is a bitwise logic instruction and both operands have the same
4109 /// opcode, try to sink the other opcode after the logic instruction.
4110 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4111   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4112   EVT VT = N0.getValueType();
4113   unsigned LogicOpcode = N->getOpcode();
4114   unsigned HandOpcode = N0.getOpcode();
4115   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4116           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4117   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4118 
4119   // Bail early if none of these transforms apply.
4120   if (N0.getNumOperands() == 0)
4121     return SDValue();
4122 
4123   // FIXME: We should check number of uses of the operands to not increase
4124   //        the instruction count for all transforms.
4125 
4126   // Handle size-changing casts.
4127   SDValue X = N0.getOperand(0);
4128   SDValue Y = N1.getOperand(0);
4129   EVT XVT = X.getValueType();
4130   SDLoc DL(N);
4131   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4132       HandOpcode == ISD::SIGN_EXTEND) {
4133     // If both operands have other uses, this transform would create extra
4134     // instructions without eliminating anything.
4135     if (!N0.hasOneUse() && !N1.hasOneUse())
4136       return SDValue();
4137     // We need matching integer source types.
4138     if (XVT != Y.getValueType())
4139       return SDValue();
4140     // Don't create an illegal op during or after legalization. Don't ever
4141     // create an unsupported vector op.
4142     if ((VT.isVector() || LegalOperations) &&
4143         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4144       return SDValue();
4145     // Avoid infinite looping with PromoteIntBinOp.
4146     // TODO: Should we apply desirable/legal constraints to all opcodes?
4147     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4148         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4149       return SDValue();
4150     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4151     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4152     return DAG.getNode(HandOpcode, DL, VT, Logic);
4153   }
4154 
4155   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4156   if (HandOpcode == ISD::TRUNCATE) {
4157     // If both operands have other uses, this transform would create extra
4158     // instructions without eliminating anything.
4159     if (!N0.hasOneUse() && !N1.hasOneUse())
4160       return SDValue();
4161     // We need matching source types.
4162     if (XVT != Y.getValueType())
4163       return SDValue();
4164     // Don't create an illegal op during or after legalization.
4165     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4166       return SDValue();
4167     // Be extra careful sinking truncate. If it's free, there's no benefit in
4168     // widening a binop. Also, don't create a logic op on an illegal type.
4169     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4170       return SDValue();
4171     if (!TLI.isTypeLegal(XVT))
4172       return SDValue();
4173     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4174     return DAG.getNode(HandOpcode, DL, VT, Logic);
4175   }
4176 
4177   // For binops SHL/SRL/SRA/AND:
4178   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4179   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4180        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4181       N0.getOperand(1) == N1.getOperand(1)) {
4182     // If either operand has other uses, this transform is not an improvement.
4183     if (!N0.hasOneUse() || !N1.hasOneUse())
4184       return SDValue();
4185     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4186     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4187   }
4188 
4189   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4190   if (HandOpcode == ISD::BSWAP) {
4191     // If either operand has other uses, this transform is not an improvement.
4192     if (!N0.hasOneUse() || !N1.hasOneUse())
4193       return SDValue();
4194     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4195     return DAG.getNode(HandOpcode, DL, VT, Logic);
4196   }
4197 
4198   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4199   // Only perform this optimization up until type legalization, before
4200   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4201   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4202   // we don't want to undo this promotion.
4203   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4204   // on scalars.
4205   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4206        Level <= AfterLegalizeTypes) {
4207     // Input types must be integer and the same.
4208     if (XVT.isInteger() && XVT == Y.getValueType()) {
4209       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4210       return DAG.getNode(HandOpcode, DL, VT, Logic);
4211     }
4212   }
4213 
4214   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4215   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4216   // If both shuffles use the same mask, and both shuffle within a single
4217   // vector, then it is worthwhile to move the swizzle after the operation.
4218   // The type-legalizer generates this pattern when loading illegal
4219   // vector types from memory. In many cases this allows additional shuffle
4220   // optimizations.
4221   // There are other cases where moving the shuffle after the xor/and/or
4222   // is profitable even if shuffles don't perform a swizzle.
4223   // If both shuffles use the same mask, and both shuffles have the same first
4224   // or second operand, then it might still be profitable to move the shuffle
4225   // after the xor/and/or operation.
4226   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4227     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4228     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4229     assert(X.getValueType() == Y.getValueType() &&
4230            "Inputs to shuffles are not the same type");
4231 
4232     // Check that both shuffles use the same mask. The masks are known to be of
4233     // the same length because the result vector type is the same.
4234     // Check also that shuffles have only one use to avoid introducing extra
4235     // instructions.
4236     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4237         !SVN0->getMask().equals(SVN1->getMask()))
4238       return SDValue();
4239 
4240     // Don't try to fold this node if it requires introducing a
4241     // build vector of all zeros that might be illegal at this stage.
4242     SDValue ShOp = N0.getOperand(1);
4243     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4244       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4245 
4246     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4247     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4248       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4249                                   N0.getOperand(0), N1.getOperand(0));
4250       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4251     }
4252 
4253     // Don't try to fold this node if it requires introducing a
4254     // build vector of all zeros that might be illegal at this stage.
4255     ShOp = N0.getOperand(0);
4256     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4257       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4258 
4259     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4260     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4261       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4262                                   N1.getOperand(1));
4263       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4264     }
4265   }
4266 
4267   return SDValue();
4268 }
4269 
4270 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4271 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4272                                        const SDLoc &DL) {
4273   SDValue LL, LR, RL, RR, N0CC, N1CC;
4274   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4275       !isSetCCEquivalent(N1, RL, RR, N1CC))
4276     return SDValue();
4277 
4278   assert(N0.getValueType() == N1.getValueType() &&
4279          "Unexpected operand types for bitwise logic op");
4280   assert(LL.getValueType() == LR.getValueType() &&
4281          RL.getValueType() == RR.getValueType() &&
4282          "Unexpected operand types for setcc");
4283 
4284   // If we're here post-legalization or the logic op type is not i1, the logic
4285   // op type must match a setcc result type. Also, all folds require new
4286   // operations on the left and right operands, so those types must match.
4287   EVT VT = N0.getValueType();
4288   EVT OpVT = LL.getValueType();
4289   if (LegalOperations || VT.getScalarType() != MVT::i1)
4290     if (VT != getSetCCResultType(OpVT))
4291       return SDValue();
4292   if (OpVT != RL.getValueType())
4293     return SDValue();
4294 
4295   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4296   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4297   bool IsInteger = OpVT.isInteger();
4298   if (LR == RR && CC0 == CC1 && IsInteger) {
4299     bool IsZero = isNullOrNullSplat(LR);
4300     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4301 
4302     // All bits clear?
4303     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4304     // All sign bits clear?
4305     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4306     // Any bits set?
4307     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4308     // Any sign bits set?
4309     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4310 
4311     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4312     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4313     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4314     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4315     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4316       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4317       AddToWorklist(Or.getNode());
4318       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4319     }
4320 
4321     // All bits set?
4322     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4323     // All sign bits set?
4324     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4325     // Any bits clear?
4326     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4327     // Any sign bits clear?
4328     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4329 
4330     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4331     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4332     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4333     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4334     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4335       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4336       AddToWorklist(And.getNode());
4337       return DAG.getSetCC(DL, VT, And, LR, CC1);
4338     }
4339   }
4340 
4341   // TODO: What is the 'or' equivalent of this fold?
4342   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4343   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4344       IsInteger && CC0 == ISD::SETNE &&
4345       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4346        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4347     SDValue One = DAG.getConstant(1, DL, OpVT);
4348     SDValue Two = DAG.getConstant(2, DL, OpVT);
4349     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4350     AddToWorklist(Add.getNode());
4351     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4352   }
4353 
4354   // Try more general transforms if the predicates match and the only user of
4355   // the compares is the 'and' or 'or'.
4356   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4357       N0.hasOneUse() && N1.hasOneUse()) {
4358     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4359     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4360     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4361       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4362       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4363       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4364       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4365       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4366     }
4367 
4368     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4369     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4370       // Match a shared variable operand and 2 non-opaque constant operands.
4371       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4372       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4373       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4374         // Canonicalize larger constant as C0.
4375         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4376           std::swap(C0, C1);
4377 
4378         // The difference of the constants must be a single bit.
4379         const APInt &C0Val = C0->getAPIntValue();
4380         const APInt &C1Val = C1->getAPIntValue();
4381         if ((C0Val - C1Val).isPowerOf2()) {
4382           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4383           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4384           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4385           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4386           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4387           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4388           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4389           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4390         }
4391       }
4392     }
4393   }
4394 
4395   // Canonicalize equivalent operands to LL == RL.
4396   if (LL == RR && LR == RL) {
4397     CC1 = ISD::getSetCCSwappedOperands(CC1);
4398     std::swap(RL, RR);
4399   }
4400 
4401   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4402   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4403   if (LL == RL && LR == RR) {
4404     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4405                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4406     if (NewCC != ISD::SETCC_INVALID &&
4407         (!LegalOperations ||
4408          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4409           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4410       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4411   }
4412 
4413   return SDValue();
4414 }
4415 
4416 /// This contains all DAGCombine rules which reduce two values combined by
4417 /// an And operation to a single value. This makes them reusable in the context
4418 /// of visitSELECT(). Rules involving constants are not included as
4419 /// visitSELECT() already handles those cases.
4420 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4421   EVT VT = N1.getValueType();
4422   SDLoc DL(N);
4423 
4424   // fold (and x, undef) -> 0
4425   if (N0.isUndef() || N1.isUndef())
4426     return DAG.getConstant(0, DL, VT);
4427 
4428   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4429     return V;
4430 
4431   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4432       VT.getSizeInBits() <= 64) {
4433     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4434       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4435         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4436         // immediate for an add, but it is legal if its top c2 bits are set,
4437         // transform the ADD so the immediate doesn't need to be materialized
4438         // in a register.
4439         APInt ADDC = ADDI->getAPIntValue();
4440         APInt SRLC = SRLI->getAPIntValue();
4441         if (ADDC.getMinSignedBits() <= 64 &&
4442             SRLC.ult(VT.getSizeInBits()) &&
4443             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4444           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4445                                              SRLC.getZExtValue());
4446           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4447             ADDC |= Mask;
4448             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4449               SDLoc DL0(N0);
4450               SDValue NewAdd =
4451                 DAG.getNode(ISD::ADD, DL0, VT,
4452                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4453               CombineTo(N0.getNode(), NewAdd);
4454               // Return N so it doesn't get rechecked!
4455               return SDValue(N, 0);
4456             }
4457           }
4458         }
4459       }
4460     }
4461   }
4462 
4463   // Reduce bit extract of low half of an integer to the narrower type.
4464   // (and (srl i64:x, K), KMask) ->
4465   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4466   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4467     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4468       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4469         unsigned Size = VT.getSizeInBits();
4470         const APInt &AndMask = CAnd->getAPIntValue();
4471         unsigned ShiftBits = CShift->getZExtValue();
4472 
4473         // Bail out, this node will probably disappear anyway.
4474         if (ShiftBits == 0)
4475           return SDValue();
4476 
4477         unsigned MaskBits = AndMask.countTrailingOnes();
4478         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4479 
4480         if (AndMask.isMask() &&
4481             // Required bits must not span the two halves of the integer and
4482             // must fit in the half size type.
4483             (ShiftBits + MaskBits <= Size / 2) &&
4484             TLI.isNarrowingProfitable(VT, HalfVT) &&
4485             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4486             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4487             TLI.isTruncateFree(VT, HalfVT) &&
4488             TLI.isZExtFree(HalfVT, VT)) {
4489           // The isNarrowingProfitable is to avoid regressions on PPC and
4490           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4491           // on downstream users of this. Those patterns could probably be
4492           // extended to handle extensions mixed in.
4493 
4494           SDValue SL(N0);
4495           assert(MaskBits <= Size);
4496 
4497           // Extracting the highest bit of the low half.
4498           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4499           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4500                                       N0.getOperand(0));
4501 
4502           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4503           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4504           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4505           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4506           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4507         }
4508       }
4509     }
4510   }
4511 
4512   return SDValue();
4513 }
4514 
4515 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4516                                    EVT LoadResultTy, EVT &ExtVT) {
4517   if (!AndC->getAPIntValue().isMask())
4518     return false;
4519 
4520   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4521 
4522   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4523   EVT LoadedVT = LoadN->getMemoryVT();
4524 
4525   if (ExtVT == LoadedVT &&
4526       (!LegalOperations ||
4527        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4528     // ZEXTLOAD will match without needing to change the size of the value being
4529     // loaded.
4530     return true;
4531   }
4532 
4533   // Do not change the width of a volatile load.
4534   if (LoadN->isVolatile())
4535     return false;
4536 
4537   // Do not generate loads of non-round integer types since these can
4538   // be expensive (and would be wrong if the type is not byte sized).
4539   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4540     return false;
4541 
4542   if (LegalOperations &&
4543       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4544     return false;
4545 
4546   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4547     return false;
4548 
4549   return true;
4550 }
4551 
4552 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4553                                     ISD::LoadExtType ExtType, EVT &MemVT,
4554                                     unsigned ShAmt) {
4555   if (!LDST)
4556     return false;
4557   // Only allow byte offsets.
4558   if (ShAmt % 8)
4559     return false;
4560 
4561   // Do not generate loads of non-round integer types since these can
4562   // be expensive (and would be wrong if the type is not byte sized).
4563   if (!MemVT.isRound())
4564     return false;
4565 
4566   // Don't change the width of a volatile load.
4567   if (LDST->isVolatile())
4568     return false;
4569 
4570   // Verify that we are actually reducing a load width here.
4571   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4572     return false;
4573 
4574   // Ensure that this isn't going to produce an unsupported unaligned access.
4575   if (ShAmt &&
4576       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4577                               LDST->getAddressSpace(), ShAmt / 8))
4578     return false;
4579 
4580   // It's not possible to generate a constant of extended or untyped type.
4581   EVT PtrType = LDST->getBasePtr().getValueType();
4582   if (PtrType == MVT::Untyped || PtrType.isExtended())
4583     return false;
4584 
4585   if (isa<LoadSDNode>(LDST)) {
4586     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4587     // Don't transform one with multiple uses, this would require adding a new
4588     // load.
4589     if (!SDValue(Load, 0).hasOneUse())
4590       return false;
4591 
4592     if (LegalOperations &&
4593         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4594       return false;
4595 
4596     // For the transform to be legal, the load must produce only two values
4597     // (the value loaded and the chain).  Don't transform a pre-increment
4598     // load, for example, which produces an extra value.  Otherwise the
4599     // transformation is not equivalent, and the downstream logic to replace
4600     // uses gets things wrong.
4601     if (Load->getNumValues() > 2)
4602       return false;
4603 
4604     // If the load that we're shrinking is an extload and we're not just
4605     // discarding the extension we can't simply shrink the load. Bail.
4606     // TODO: It would be possible to merge the extensions in some cases.
4607     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4608         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4609       return false;
4610 
4611     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4612       return false;
4613   } else {
4614     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4615     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4616     // Can't write outside the original store
4617     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4618       return false;
4619 
4620     if (LegalOperations &&
4621         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4622       return false;
4623   }
4624   return true;
4625 }
4626 
4627 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4628                                     SmallVectorImpl<LoadSDNode*> &Loads,
4629                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4630                                     ConstantSDNode *Mask,
4631                                     SDNode *&NodeToMask) {
4632   // Recursively search for the operands, looking for loads which can be
4633   // narrowed.
4634   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4635     SDValue Op = N->getOperand(i);
4636 
4637     if (Op.getValueType().isVector())
4638       return false;
4639 
4640     // Some constants may need fixing up later if they are too large.
4641     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4642       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4643           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4644         NodesWithConsts.insert(N);
4645       continue;
4646     }
4647 
4648     if (!Op.hasOneUse())
4649       return false;
4650 
4651     switch(Op.getOpcode()) {
4652     case ISD::LOAD: {
4653       auto *Load = cast<LoadSDNode>(Op);
4654       EVT ExtVT;
4655       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4656           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4657 
4658         // ZEXTLOAD is already small enough.
4659         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4660             ExtVT.bitsGE(Load->getMemoryVT()))
4661           continue;
4662 
4663         // Use LE to convert equal sized loads to zext.
4664         if (ExtVT.bitsLE(Load->getMemoryVT()))
4665           Loads.push_back(Load);
4666 
4667         continue;
4668       }
4669       return false;
4670     }
4671     case ISD::ZERO_EXTEND:
4672     case ISD::AssertZext: {
4673       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4674       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4675       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4676         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4677         Op.getOperand(0).getValueType();
4678 
4679       // We can accept extending nodes if the mask is wider or an equal
4680       // width to the original type.
4681       if (ExtVT.bitsGE(VT))
4682         continue;
4683       break;
4684     }
4685     case ISD::OR:
4686     case ISD::XOR:
4687     case ISD::AND:
4688       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4689                              NodeToMask))
4690         return false;
4691       continue;
4692     }
4693 
4694     // Allow one node which will masked along with any loads found.
4695     if (NodeToMask)
4696       return false;
4697 
4698     // Also ensure that the node to be masked only produces one data result.
4699     NodeToMask = Op.getNode();
4700     if (NodeToMask->getNumValues() > 1) {
4701       bool HasValue = false;
4702       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4703         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4704         if (VT != MVT::Glue && VT != MVT::Other) {
4705           if (HasValue) {
4706             NodeToMask = nullptr;
4707             return false;
4708           }
4709           HasValue = true;
4710         }
4711       }
4712       assert(HasValue && "Node to be masked has no data result?");
4713     }
4714   }
4715   return true;
4716 }
4717 
4718 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4719   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4720   if (!Mask)
4721     return false;
4722 
4723   if (!Mask->getAPIntValue().isMask())
4724     return false;
4725 
4726   // No need to do anything if the and directly uses a load.
4727   if (isa<LoadSDNode>(N->getOperand(0)))
4728     return false;
4729 
4730   SmallVector<LoadSDNode*, 8> Loads;
4731   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4732   SDNode *FixupNode = nullptr;
4733   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4734     if (Loads.size() == 0)
4735       return false;
4736 
4737     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4738     SDValue MaskOp = N->getOperand(1);
4739 
4740     // If it exists, fixup the single node we allow in the tree that needs
4741     // masking.
4742     if (FixupNode) {
4743       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4744       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4745                                 FixupNode->getValueType(0),
4746                                 SDValue(FixupNode, 0), MaskOp);
4747       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4748       if (And.getOpcode() == ISD ::AND)
4749         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4750     }
4751 
4752     // Narrow any constants that need it.
4753     for (auto *LogicN : NodesWithConsts) {
4754       SDValue Op0 = LogicN->getOperand(0);
4755       SDValue Op1 = LogicN->getOperand(1);
4756 
4757       if (isa<ConstantSDNode>(Op0))
4758           std::swap(Op0, Op1);
4759 
4760       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4761                                 Op1, MaskOp);
4762 
4763       DAG.UpdateNodeOperands(LogicN, Op0, And);
4764     }
4765 
4766     // Create narrow loads.
4767     for (auto *Load : Loads) {
4768       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4769       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4770                                 SDValue(Load, 0), MaskOp);
4771       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4772       if (And.getOpcode() == ISD ::AND)
4773         And = SDValue(
4774             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4775       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4776       assert(NewLoad &&
4777              "Shouldn't be masking the load if it can't be narrowed");
4778       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4779     }
4780     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4781     return true;
4782   }
4783   return false;
4784 }
4785 
4786 // Unfold
4787 //    x &  (-1 'logical shift' y)
4788 // To
4789 //    (x 'opposite logical shift' y) 'logical shift' y
4790 // if it is better for performance.
4791 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4792   assert(N->getOpcode() == ISD::AND);
4793 
4794   SDValue N0 = N->getOperand(0);
4795   SDValue N1 = N->getOperand(1);
4796 
4797   // Do we actually prefer shifts over mask?
4798   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4799     return SDValue();
4800 
4801   // Try to match  (-1 '[outer] logical shift' y)
4802   unsigned OuterShift;
4803   unsigned InnerShift; // The opposite direction to the OuterShift.
4804   SDValue Y;           // Shift amount.
4805   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4806     if (!M.hasOneUse())
4807       return false;
4808     OuterShift = M->getOpcode();
4809     if (OuterShift == ISD::SHL)
4810       InnerShift = ISD::SRL;
4811     else if (OuterShift == ISD::SRL)
4812       InnerShift = ISD::SHL;
4813     else
4814       return false;
4815     if (!isAllOnesConstant(M->getOperand(0)))
4816       return false;
4817     Y = M->getOperand(1);
4818     return true;
4819   };
4820 
4821   SDValue X;
4822   if (matchMask(N1))
4823     X = N0;
4824   else if (matchMask(N0))
4825     X = N1;
4826   else
4827     return SDValue();
4828 
4829   SDLoc DL(N);
4830   EVT VT = N->getValueType(0);
4831 
4832   //     tmp = x   'opposite logical shift' y
4833   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4834   //     ret = tmp 'logical shift' y
4835   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4836 
4837   return T1;
4838 }
4839 
4840 SDValue DAGCombiner::visitAND(SDNode *N) {
4841   SDValue N0 = N->getOperand(0);
4842   SDValue N1 = N->getOperand(1);
4843   EVT VT = N1.getValueType();
4844 
4845   // x & x --> x
4846   if (N0 == N1)
4847     return N0;
4848 
4849   // fold vector ops
4850   if (VT.isVector()) {
4851     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4852       return FoldedVOp;
4853 
4854     // fold (and x, 0) -> 0, vector edition
4855     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4856       // do not return N0, because undef node may exist in N0
4857       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4858                              SDLoc(N), N0.getValueType());
4859     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4860       // do not return N1, because undef node may exist in N1
4861       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4862                              SDLoc(N), N1.getValueType());
4863 
4864     // fold (and x, -1) -> x, vector edition
4865     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4866       return N1;
4867     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4868       return N0;
4869   }
4870 
4871   // fold (and c1, c2) -> c1&c2
4872   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4873   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4874   if (N0C && N1C && !N1C->isOpaque())
4875     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4876   // canonicalize constant to RHS
4877   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4878       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4879     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4880   // fold (and x, -1) -> x
4881   if (isAllOnesConstant(N1))
4882     return N0;
4883   // if (and x, c) is known to be zero, return 0
4884   unsigned BitWidth = VT.getScalarSizeInBits();
4885   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4886                                    APInt::getAllOnesValue(BitWidth)))
4887     return DAG.getConstant(0, SDLoc(N), VT);
4888 
4889   if (SDValue NewSel = foldBinOpIntoSelect(N))
4890     return NewSel;
4891 
4892   // reassociate and
4893   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4894     return RAND;
4895 
4896   // Try to convert a constant mask AND into a shuffle clear mask.
4897   if (VT.isVector())
4898     if (SDValue Shuffle = XformToShuffleWithZero(N))
4899       return Shuffle;
4900 
4901   // fold (and (or x, C), D) -> D if (C & D) == D
4902   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4903     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4904   };
4905   if (N0.getOpcode() == ISD::OR &&
4906       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4907     return N1;
4908   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4909   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4910     SDValue N0Op0 = N0.getOperand(0);
4911     APInt Mask = ~N1C->getAPIntValue();
4912     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4913     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4914       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4915                                  N0.getValueType(), N0Op0);
4916 
4917       // Replace uses of the AND with uses of the Zero extend node.
4918       CombineTo(N, Zext);
4919 
4920       // We actually want to replace all uses of the any_extend with the
4921       // zero_extend, to avoid duplicating things.  This will later cause this
4922       // AND to be folded.
4923       CombineTo(N0.getNode(), Zext);
4924       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4925     }
4926   }
4927   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4928   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4929   // already be zero by virtue of the width of the base type of the load.
4930   //
4931   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4932   // more cases.
4933   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4934        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4935        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4936        N0.getOperand(0).getResNo() == 0) ||
4937       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4938     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4939                                          N0 : N0.getOperand(0) );
4940 
4941     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4942     // This can be a pure constant or a vector splat, in which case we treat the
4943     // vector as a scalar and use the splat value.
4944     APInt Constant = APInt::getNullValue(1);
4945     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4946       Constant = C->getAPIntValue();
4947     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4948       APInt SplatValue, SplatUndef;
4949       unsigned SplatBitSize;
4950       bool HasAnyUndefs;
4951       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4952                                              SplatBitSize, HasAnyUndefs);
4953       if (IsSplat) {
4954         // Undef bits can contribute to a possible optimisation if set, so
4955         // set them.
4956         SplatValue |= SplatUndef;
4957 
4958         // The splat value may be something like "0x00FFFFFF", which means 0 for
4959         // the first vector value and FF for the rest, repeating. We need a mask
4960         // that will apply equally to all members of the vector, so AND all the
4961         // lanes of the constant together.
4962         EVT VT = Vector->getValueType(0);
4963         unsigned BitWidth = VT.getScalarSizeInBits();
4964 
4965         // If the splat value has been compressed to a bitlength lower
4966         // than the size of the vector lane, we need to re-expand it to
4967         // the lane size.
4968         if (BitWidth > SplatBitSize)
4969           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4970                SplatBitSize < BitWidth;
4971                SplatBitSize = SplatBitSize * 2)
4972             SplatValue |= SplatValue.shl(SplatBitSize);
4973 
4974         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4975         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4976         if (SplatBitSize % BitWidth == 0) {
4977           Constant = APInt::getAllOnesValue(BitWidth);
4978           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4979             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4980         }
4981       }
4982     }
4983 
4984     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4985     // actually legal and isn't going to get expanded, else this is a false
4986     // optimisation.
4987     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4988                                                     Load->getValueType(0),
4989                                                     Load->getMemoryVT());
4990 
4991     // Resize the constant to the same size as the original memory access before
4992     // extension. If it is still the AllOnesValue then this AND is completely
4993     // unneeded.
4994     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4995 
4996     bool B;
4997     switch (Load->getExtensionType()) {
4998     default: B = false; break;
4999     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5000     case ISD::ZEXTLOAD:
5001     case ISD::NON_EXTLOAD: B = true; break;
5002     }
5003 
5004     if (B && Constant.isAllOnesValue()) {
5005       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5006       // preserve semantics once we get rid of the AND.
5007       SDValue NewLoad(Load, 0);
5008 
5009       // Fold the AND away. NewLoad may get replaced immediately.
5010       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5011 
5012       if (Load->getExtensionType() == ISD::EXTLOAD) {
5013         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5014                               Load->getValueType(0), SDLoc(Load),
5015                               Load->getChain(), Load->getBasePtr(),
5016                               Load->getOffset(), Load->getMemoryVT(),
5017                               Load->getMemOperand());
5018         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5019         if (Load->getNumValues() == 3) {
5020           // PRE/POST_INC loads have 3 values.
5021           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5022                            NewLoad.getValue(2) };
5023           CombineTo(Load, To, 3, true);
5024         } else {
5025           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5026         }
5027       }
5028 
5029       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5030     }
5031   }
5032 
5033   // fold (and (load x), 255) -> (zextload x, i8)
5034   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5035   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5036   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5037                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5038                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5039     if (SDValue Res = ReduceLoadWidth(N)) {
5040       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5041         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5042       AddToWorklist(N);
5043       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5044       return SDValue(N, 0);
5045     }
5046   }
5047 
5048   if (Level >= AfterLegalizeTypes) {
5049     // Attempt to propagate the AND back up to the leaves which, if they're
5050     // loads, can be combined to narrow loads and the AND node can be removed.
5051     // Perform after legalization so that extend nodes will already be
5052     // combined into the loads.
5053     if (BackwardsPropagateMask(N, DAG)) {
5054       return SDValue(N, 0);
5055     }
5056   }
5057 
5058   if (SDValue Combined = visitANDLike(N0, N1, N))
5059     return Combined;
5060 
5061   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5062   if (N0.getOpcode() == N1.getOpcode())
5063     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5064       return V;
5065 
5066   // Masking the negated extension of a boolean is just the zero-extended
5067   // boolean:
5068   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5069   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5070   //
5071   // Note: the SimplifyDemandedBits fold below can make an information-losing
5072   // transform, and then we have no way to find this better fold.
5073   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5074     if (isNullOrNullSplat(N0.getOperand(0))) {
5075       SDValue SubRHS = N0.getOperand(1);
5076       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5077           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5078         return SubRHS;
5079       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5080           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5081         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5082     }
5083   }
5084 
5085   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5086   // fold (and (sra)) -> (and (srl)) when possible.
5087   if (SimplifyDemandedBits(SDValue(N, 0)))
5088     return SDValue(N, 0);
5089 
5090   // fold (zext_inreg (extload x)) -> (zextload x)
5091   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
5092     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5093     EVT MemVT = LN0->getMemoryVT();
5094     // If we zero all the possible extended bits, then we can turn this into
5095     // a zextload if we are running before legalize or the operation is legal.
5096     unsigned BitWidth = N1.getScalarValueSizeInBits();
5097     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5098                            BitWidth - MemVT.getScalarSizeInBits())) &&
5099         ((!LegalOperations && !LN0->isVolatile()) ||
5100          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5101       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5102                                        LN0->getChain(), LN0->getBasePtr(),
5103                                        MemVT, LN0->getMemOperand());
5104       AddToWorklist(N);
5105       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5106       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5107     }
5108   }
5109   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5110   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
5111       N0.hasOneUse()) {
5112     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5113     EVT MemVT = LN0->getMemoryVT();
5114     // If we zero all the possible extended bits, then we can turn this into
5115     // a zextload if we are running before legalize or the operation is legal.
5116     unsigned BitWidth = N1.getScalarValueSizeInBits();
5117     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5118                            BitWidth - MemVT.getScalarSizeInBits())) &&
5119         ((!LegalOperations && !LN0->isVolatile()) ||
5120          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5121       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5122                                        LN0->getChain(), LN0->getBasePtr(),
5123                                        MemVT, LN0->getMemOperand());
5124       AddToWorklist(N);
5125       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5126       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5127     }
5128   }
5129   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5130   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5131     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5132                                            N0.getOperand(1), false))
5133       return BSwap;
5134   }
5135 
5136   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5137     return Shifts;
5138 
5139   return SDValue();
5140 }
5141 
5142 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5143 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5144                                         bool DemandHighBits) {
5145   if (!LegalOperations)
5146     return SDValue();
5147 
5148   EVT VT = N->getValueType(0);
5149   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5150     return SDValue();
5151   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5152     return SDValue();
5153 
5154   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5155   bool LookPassAnd0 = false;
5156   bool LookPassAnd1 = false;
5157   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5158       std::swap(N0, N1);
5159   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5160       std::swap(N0, N1);
5161   if (N0.getOpcode() == ISD::AND) {
5162     if (!N0.getNode()->hasOneUse())
5163       return SDValue();
5164     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5165     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5166     // This is needed for X86.
5167     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5168                   N01C->getZExtValue() != 0xFFFF))
5169       return SDValue();
5170     N0 = N0.getOperand(0);
5171     LookPassAnd0 = true;
5172   }
5173 
5174   if (N1.getOpcode() == ISD::AND) {
5175     if (!N1.getNode()->hasOneUse())
5176       return SDValue();
5177     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5178     if (!N11C || N11C->getZExtValue() != 0xFF)
5179       return SDValue();
5180     N1 = N1.getOperand(0);
5181     LookPassAnd1 = true;
5182   }
5183 
5184   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5185     std::swap(N0, N1);
5186   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5187     return SDValue();
5188   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5189     return SDValue();
5190 
5191   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5192   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5193   if (!N01C || !N11C)
5194     return SDValue();
5195   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5196     return SDValue();
5197 
5198   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5199   SDValue N00 = N0->getOperand(0);
5200   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5201     if (!N00.getNode()->hasOneUse())
5202       return SDValue();
5203     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5204     if (!N001C || N001C->getZExtValue() != 0xFF)
5205       return SDValue();
5206     N00 = N00.getOperand(0);
5207     LookPassAnd0 = true;
5208   }
5209 
5210   SDValue N10 = N1->getOperand(0);
5211   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5212     if (!N10.getNode()->hasOneUse())
5213       return SDValue();
5214     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5215     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5216     // for X86.
5217     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5218                    N101C->getZExtValue() != 0xFFFF))
5219       return SDValue();
5220     N10 = N10.getOperand(0);
5221     LookPassAnd1 = true;
5222   }
5223 
5224   if (N00 != N10)
5225     return SDValue();
5226 
5227   // Make sure everything beyond the low halfword gets set to zero since the SRL
5228   // 16 will clear the top bits.
5229   unsigned OpSizeInBits = VT.getSizeInBits();
5230   if (DemandHighBits && OpSizeInBits > 16) {
5231     // If the left-shift isn't masked out then the only way this is a bswap is
5232     // if all bits beyond the low 8 are 0. In that case the entire pattern
5233     // reduces to a left shift anyway: leave it for other parts of the combiner.
5234     if (!LookPassAnd0)
5235       return SDValue();
5236 
5237     // However, if the right shift isn't masked out then it might be because
5238     // it's not needed. See if we can spot that too.
5239     if (!LookPassAnd1 &&
5240         !DAG.MaskedValueIsZero(
5241             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5242       return SDValue();
5243   }
5244 
5245   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5246   if (OpSizeInBits > 16) {
5247     SDLoc DL(N);
5248     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5249                       DAG.getConstant(OpSizeInBits - 16, DL,
5250                                       getShiftAmountTy(VT)));
5251   }
5252   return Res;
5253 }
5254 
5255 /// Return true if the specified node is an element that makes up a 32-bit
5256 /// packed halfword byteswap.
5257 /// ((x & 0x000000ff) << 8) |
5258 /// ((x & 0x0000ff00) >> 8) |
5259 /// ((x & 0x00ff0000) << 8) |
5260 /// ((x & 0xff000000) >> 8)
5261 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5262   if (!N.getNode()->hasOneUse())
5263     return false;
5264 
5265   unsigned Opc = N.getOpcode();
5266   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5267     return false;
5268 
5269   SDValue N0 = N.getOperand(0);
5270   unsigned Opc0 = N0.getOpcode();
5271   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5272     return false;
5273 
5274   ConstantSDNode *N1C = nullptr;
5275   // SHL or SRL: look upstream for AND mask operand
5276   if (Opc == ISD::AND)
5277     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5278   else if (Opc0 == ISD::AND)
5279     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5280   if (!N1C)
5281     return false;
5282 
5283   unsigned MaskByteOffset;
5284   switch (N1C->getZExtValue()) {
5285   default:
5286     return false;
5287   case 0xFF:       MaskByteOffset = 0; break;
5288   case 0xFF00:     MaskByteOffset = 1; break;
5289   case 0xFFFF:
5290     // In case demanded bits didn't clear the bits that will be shifted out.
5291     // This is needed for X86.
5292     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5293       MaskByteOffset = 1;
5294       break;
5295     }
5296     return false;
5297   case 0xFF0000:   MaskByteOffset = 2; break;
5298   case 0xFF000000: MaskByteOffset = 3; break;
5299   }
5300 
5301   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5302   if (Opc == ISD::AND) {
5303     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5304       // (x >> 8) & 0xff
5305       // (x >> 8) & 0xff0000
5306       if (Opc0 != ISD::SRL)
5307         return false;
5308       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5309       if (!C || C->getZExtValue() != 8)
5310         return false;
5311     } else {
5312       // (x << 8) & 0xff00
5313       // (x << 8) & 0xff000000
5314       if (Opc0 != ISD::SHL)
5315         return false;
5316       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5317       if (!C || C->getZExtValue() != 8)
5318         return false;
5319     }
5320   } else if (Opc == ISD::SHL) {
5321     // (x & 0xff) << 8
5322     // (x & 0xff0000) << 8
5323     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5324       return false;
5325     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5326     if (!C || C->getZExtValue() != 8)
5327       return false;
5328   } else { // Opc == ISD::SRL
5329     // (x & 0xff00) >> 8
5330     // (x & 0xff000000) >> 8
5331     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5332       return false;
5333     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5334     if (!C || C->getZExtValue() != 8)
5335       return false;
5336   }
5337 
5338   if (Parts[MaskByteOffset])
5339     return false;
5340 
5341   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5342   return true;
5343 }
5344 
5345 /// Match a 32-bit packed halfword bswap. That is
5346 /// ((x & 0x000000ff) << 8) |
5347 /// ((x & 0x0000ff00) >> 8) |
5348 /// ((x & 0x00ff0000) << 8) |
5349 /// ((x & 0xff000000) >> 8)
5350 /// => (rotl (bswap x), 16)
5351 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5352   if (!LegalOperations)
5353     return SDValue();
5354 
5355   EVT VT = N->getValueType(0);
5356   if (VT != MVT::i32)
5357     return SDValue();
5358   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5359     return SDValue();
5360 
5361   // Look for either
5362   // (or (or (and), (and)), (or (and), (and)))
5363   // (or (or (or (and), (and)), (and)), (and))
5364   if (N0.getOpcode() != ISD::OR)
5365     return SDValue();
5366   SDValue N00 = N0.getOperand(0);
5367   SDValue N01 = N0.getOperand(1);
5368   SDNode *Parts[4] = {};
5369 
5370   if (N1.getOpcode() == ISD::OR &&
5371       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5372     // (or (or (and), (and)), (or (and), (and)))
5373     if (!isBSwapHWordElement(N00, Parts))
5374       return SDValue();
5375 
5376     if (!isBSwapHWordElement(N01, Parts))
5377       return SDValue();
5378     SDValue N10 = N1.getOperand(0);
5379     if (!isBSwapHWordElement(N10, Parts))
5380       return SDValue();
5381     SDValue N11 = N1.getOperand(1);
5382     if (!isBSwapHWordElement(N11, Parts))
5383       return SDValue();
5384   } else {
5385     // (or (or (or (and), (and)), (and)), (and))
5386     if (!isBSwapHWordElement(N1, Parts))
5387       return SDValue();
5388     if (!isBSwapHWordElement(N01, Parts))
5389       return SDValue();
5390     if (N00.getOpcode() != ISD::OR)
5391       return SDValue();
5392     SDValue N000 = N00.getOperand(0);
5393     if (!isBSwapHWordElement(N000, Parts))
5394       return SDValue();
5395     SDValue N001 = N00.getOperand(1);
5396     if (!isBSwapHWordElement(N001, Parts))
5397       return SDValue();
5398   }
5399 
5400   // Make sure the parts are all coming from the same node.
5401   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5402     return SDValue();
5403 
5404   SDLoc DL(N);
5405   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5406                               SDValue(Parts[0], 0));
5407 
5408   // Result of the bswap should be rotated by 16. If it's not legal, then
5409   // do  (x << 16) | (x >> 16).
5410   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5411   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5412     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5413   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5414     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5415   return DAG.getNode(ISD::OR, DL, VT,
5416                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5417                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5418 }
5419 
5420 /// This contains all DAGCombine rules which reduce two values combined by
5421 /// an Or operation to a single value \see visitANDLike().
5422 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5423   EVT VT = N1.getValueType();
5424   SDLoc DL(N);
5425 
5426   // fold (or x, undef) -> -1
5427   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5428     return DAG.getAllOnesConstant(DL, VT);
5429 
5430   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5431     return V;
5432 
5433   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5434   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5435       // Don't increase # computations.
5436       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5437     // We can only do this xform if we know that bits from X that are set in C2
5438     // but not in C1 are already zero.  Likewise for Y.
5439     if (const ConstantSDNode *N0O1C =
5440         getAsNonOpaqueConstant(N0.getOperand(1))) {
5441       if (const ConstantSDNode *N1O1C =
5442           getAsNonOpaqueConstant(N1.getOperand(1))) {
5443         // We can only do this xform if we know that bits from X that are set in
5444         // C2 but not in C1 are already zero.  Likewise for Y.
5445         const APInt &LHSMask = N0O1C->getAPIntValue();
5446         const APInt &RHSMask = N1O1C->getAPIntValue();
5447 
5448         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5449             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5450           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5451                                   N0.getOperand(0), N1.getOperand(0));
5452           return DAG.getNode(ISD::AND, DL, VT, X,
5453                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5454         }
5455       }
5456     }
5457   }
5458 
5459   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5460   if (N0.getOpcode() == ISD::AND &&
5461       N1.getOpcode() == ISD::AND &&
5462       N0.getOperand(0) == N1.getOperand(0) &&
5463       // Don't increase # computations.
5464       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5465     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5466                             N0.getOperand(1), N1.getOperand(1));
5467     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5468   }
5469 
5470   return SDValue();
5471 }
5472 
5473 /// OR combines for which the commuted variant will be tried as well.
5474 static SDValue visitORCommutative(
5475     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5476   EVT VT = N0.getValueType();
5477   if (N0.getOpcode() == ISD::AND) {
5478     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5479     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5480       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5481 
5482     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5483     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5484       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5485   }
5486 
5487   return SDValue();
5488 }
5489 
5490 SDValue DAGCombiner::visitOR(SDNode *N) {
5491   SDValue N0 = N->getOperand(0);
5492   SDValue N1 = N->getOperand(1);
5493   EVT VT = N1.getValueType();
5494 
5495   // x | x --> x
5496   if (N0 == N1)
5497     return N0;
5498 
5499   // fold vector ops
5500   if (VT.isVector()) {
5501     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5502       return FoldedVOp;
5503 
5504     // fold (or x, 0) -> x, vector edition
5505     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5506       return N1;
5507     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5508       return N0;
5509 
5510     // fold (or x, -1) -> -1, vector edition
5511     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5512       // do not return N0, because undef node may exist in N0
5513       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5514     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5515       // do not return N1, because undef node may exist in N1
5516       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5517 
5518     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5519     // Do this only if the resulting shuffle is legal.
5520     if (isa<ShuffleVectorSDNode>(N0) &&
5521         isa<ShuffleVectorSDNode>(N1) &&
5522         // Avoid folding a node with illegal type.
5523         TLI.isTypeLegal(VT)) {
5524       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5525       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5526       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5527       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5528       // Ensure both shuffles have a zero input.
5529       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5530         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5531         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5532         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5533         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5534         bool CanFold = true;
5535         int NumElts = VT.getVectorNumElements();
5536         SmallVector<int, 4> Mask(NumElts);
5537 
5538         for (int i = 0; i != NumElts; ++i) {
5539           int M0 = SV0->getMaskElt(i);
5540           int M1 = SV1->getMaskElt(i);
5541 
5542           // Determine if either index is pointing to a zero vector.
5543           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5544           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5545 
5546           // If one element is zero and the otherside is undef, keep undef.
5547           // This also handles the case that both are undef.
5548           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5549             Mask[i] = -1;
5550             continue;
5551           }
5552 
5553           // Make sure only one of the elements is zero.
5554           if (M0Zero == M1Zero) {
5555             CanFold = false;
5556             break;
5557           }
5558 
5559           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5560 
5561           // We have a zero and non-zero element. If the non-zero came from
5562           // SV0 make the index a LHS index. If it came from SV1, make it
5563           // a RHS index. We need to mod by NumElts because we don't care
5564           // which operand it came from in the original shuffles.
5565           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5566         }
5567 
5568         if (CanFold) {
5569           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5570           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5571 
5572           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5573           if (!LegalMask) {
5574             std::swap(NewLHS, NewRHS);
5575             ShuffleVectorSDNode::commuteMask(Mask);
5576             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5577           }
5578 
5579           if (LegalMask)
5580             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5581         }
5582       }
5583     }
5584   }
5585 
5586   // fold (or c1, c2) -> c1|c2
5587   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5588   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5589   if (N0C && N1C && !N1C->isOpaque())
5590     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5591   // canonicalize constant to RHS
5592   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5593      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5594     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5595   // fold (or x, 0) -> x
5596   if (isNullConstant(N1))
5597     return N0;
5598   // fold (or x, -1) -> -1
5599   if (isAllOnesConstant(N1))
5600     return N1;
5601 
5602   if (SDValue NewSel = foldBinOpIntoSelect(N))
5603     return NewSel;
5604 
5605   // fold (or x, c) -> c iff (x & ~c) == 0
5606   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5607     return N1;
5608 
5609   if (SDValue Combined = visitORLike(N0, N1, N))
5610     return Combined;
5611 
5612   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5613   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5614     return BSwap;
5615   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5616     return BSwap;
5617 
5618   // reassociate or
5619   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5620     return ROR;
5621 
5622   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5623   // iff (c1 & c2) != 0 or c1/c2 are undef.
5624   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5625     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5626   };
5627   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5628       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5629     if (SDValue COR = DAG.FoldConstantArithmetic(
5630             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5631       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5632       AddToWorklist(IOR.getNode());
5633       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5634     }
5635   }
5636 
5637   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5638     return Combined;
5639   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5640     return Combined;
5641 
5642   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5643   if (N0.getOpcode() == N1.getOpcode())
5644     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5645       return V;
5646 
5647   // See if this is some rotate idiom.
5648   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5649     return SDValue(Rot, 0);
5650 
5651   if (SDValue Load = MatchLoadCombine(N))
5652     return Load;
5653 
5654   // Simplify the operands using demanded-bits information.
5655   if (SimplifyDemandedBits(SDValue(N, 0)))
5656     return SDValue(N, 0);
5657 
5658   // If OR can be rewritten into ADD, try combines based on ADD.
5659   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5660       DAG.haveNoCommonBitsSet(N0, N1))
5661     if (SDValue Combined = visitADDLike(N))
5662       return Combined;
5663 
5664   return SDValue();
5665 }
5666 
5667 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5668   if (Op.getOpcode() == ISD::AND &&
5669       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5670     Mask = Op.getOperand(1);
5671     return Op.getOperand(0);
5672   }
5673   return Op;
5674 }
5675 
5676 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5677 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5678                             SDValue &Mask) {
5679   Op = stripConstantMask(DAG, Op, Mask);
5680   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5681     Shift = Op;
5682     return true;
5683   }
5684   return false;
5685 }
5686 
5687 /// Helper function for visitOR to extract the needed side of a rotate idiom
5688 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5689 /// InstCombine merged some outside op with one of the shifts from
5690 /// the rotate pattern.
5691 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5692 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5693 /// patterns:
5694 ///
5695 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5696 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5697 ///
5698 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5699 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5700 ///
5701 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5702 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5703 ///
5704 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5705 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5706 ///
5707 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5708 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5709                                      SDValue ExtractFrom, SDValue &Mask,
5710                                      const SDLoc &DL) {
5711   assert(OppShift && ExtractFrom && "Empty SDValue");
5712   assert(
5713       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5714       "Existing shift must be valid as a rotate half");
5715 
5716   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5717   // Preconditions:
5718   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5719   //
5720   // Find opcode of the needed shift to be extracted from (op0 v c0).
5721   unsigned Opcode = ISD::DELETED_NODE;
5722   bool IsMulOrDiv = false;
5723   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5724   // opcode or its arithmetic (mul or udiv) variant.
5725   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5726     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5727     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5728       return false;
5729     Opcode = NeededShift;
5730     return true;
5731   };
5732   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5733   // that the needed shift can be extracted from.
5734   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5735       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5736     return SDValue();
5737 
5738   // op0 must be the same opcode on both sides, have the same LHS argument,
5739   // and produce the same value type.
5740   SDValue OppShiftLHS = OppShift.getOperand(0);
5741   EVT ShiftedVT = OppShiftLHS.getValueType();
5742   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5743       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5744       ShiftedVT != ExtractFrom.getValueType())
5745     return SDValue();
5746 
5747   // Amount of the existing shift.
5748   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5749   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5750   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5751   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5752   ConstantSDNode *ExtractFromCst =
5753       isConstOrConstSplat(ExtractFrom.getOperand(1));
5754   // TODO: We should be able to handle non-uniform constant vectors for these values
5755   // Check that we have constant values.
5756   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5757       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5758       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5759     return SDValue();
5760 
5761   // Compute the shift amount we need to extract to complete the rotate.
5762   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5763   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5764     return SDValue();
5765   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5766   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5767   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5768   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5769   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5770 
5771   // Now try extract the needed shift from the ExtractFrom op and see if the
5772   // result matches up with the existing shift's LHS op.
5773   if (IsMulOrDiv) {
5774     // Op to extract from is a mul or udiv by a constant.
5775     // Check:
5776     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5777     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5778     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5779                                                  NeededShiftAmt.getZExtValue());
5780     APInt ResultAmt;
5781     APInt Rem;
5782     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5783     if (Rem != 0 || ResultAmt != OppLHSAmt)
5784       return SDValue();
5785   } else {
5786     // Op to extract from is a shift by a constant.
5787     // Check:
5788     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5789     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5790                                           ExtractFromAmt.getBitWidth()))
5791       return SDValue();
5792   }
5793 
5794   // Return the expanded shift op that should allow a rotate to be formed.
5795   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5796   EVT ResVT = ExtractFrom.getValueType();
5797   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5798   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5799 }
5800 
5801 // Return true if we can prove that, whenever Neg and Pos are both in the
5802 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5803 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5804 //
5805 //     (or (shift1 X, Neg), (shift2 X, Pos))
5806 //
5807 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5808 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5809 // to consider shift amounts with defined behavior.
5810 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5811                            SelectionDAG &DAG) {
5812   // If EltSize is a power of 2 then:
5813   //
5814   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5815   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5816   //
5817   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5818   // for the stronger condition:
5819   //
5820   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5821   //
5822   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5823   // we can just replace Neg with Neg' for the rest of the function.
5824   //
5825   // In other cases we check for the even stronger condition:
5826   //
5827   //     Neg == EltSize - Pos                                    [B]
5828   //
5829   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5830   // behavior if Pos == 0 (and consequently Neg == EltSize).
5831   //
5832   // We could actually use [A] whenever EltSize is a power of 2, but the
5833   // only extra cases that it would match are those uninteresting ones
5834   // where Neg and Pos are never in range at the same time.  E.g. for
5835   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5836   // as well as (sub 32, Pos), but:
5837   //
5838   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5839   //
5840   // always invokes undefined behavior for 32-bit X.
5841   //
5842   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5843   unsigned MaskLoBits = 0;
5844   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5845     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5846       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5847       unsigned Bits = Log2_64(EltSize);
5848       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5849           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5850         Neg = Neg.getOperand(0);
5851         MaskLoBits = Bits;
5852       }
5853     }
5854   }
5855 
5856   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5857   if (Neg.getOpcode() != ISD::SUB)
5858     return false;
5859   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5860   if (!NegC)
5861     return false;
5862   SDValue NegOp1 = Neg.getOperand(1);
5863 
5864   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5865   // Pos'.  The truncation is redundant for the purpose of the equality.
5866   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5867     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5868       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5869       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5870           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5871            MaskLoBits))
5872         Pos = Pos.getOperand(0);
5873     }
5874   }
5875 
5876   // The condition we need is now:
5877   //
5878   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5879   //
5880   // If NegOp1 == Pos then we need:
5881   //
5882   //              EltSize & Mask == NegC & Mask
5883   //
5884   // (because "x & Mask" is a truncation and distributes through subtraction).
5885   APInt Width;
5886   if (Pos == NegOp1)
5887     Width = NegC->getAPIntValue();
5888 
5889   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5890   // Then the condition we want to prove becomes:
5891   //
5892   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5893   //
5894   // which, again because "x & Mask" is a truncation, becomes:
5895   //
5896   //                NegC & Mask == (EltSize - PosC) & Mask
5897   //             EltSize & Mask == (NegC + PosC) & Mask
5898   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5899     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5900       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5901     else
5902       return false;
5903   } else
5904     return false;
5905 
5906   // Now we just need to check that EltSize & Mask == Width & Mask.
5907   if (MaskLoBits)
5908     // EltSize & Mask is 0 since Mask is EltSize - 1.
5909     return Width.getLoBits(MaskLoBits) == 0;
5910   return Width == EltSize;
5911 }
5912 
5913 // A subroutine of MatchRotate used once we have found an OR of two opposite
5914 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5915 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5916 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5917 // Neg with outer conversions stripped away.
5918 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5919                                        SDValue Neg, SDValue InnerPos,
5920                                        SDValue InnerNeg, unsigned PosOpcode,
5921                                        unsigned NegOpcode, const SDLoc &DL) {
5922   // fold (or (shl x, (*ext y)),
5923   //          (srl x, (*ext (sub 32, y)))) ->
5924   //   (rotl x, y) or (rotr x, (sub 32, y))
5925   //
5926   // fold (or (shl x, (*ext (sub 32, y))),
5927   //          (srl x, (*ext y))) ->
5928   //   (rotr x, y) or (rotl x, (sub 32, y))
5929   EVT VT = Shifted.getValueType();
5930   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5931     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5932     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5933                        HasPos ? Pos : Neg).getNode();
5934   }
5935 
5936   return nullptr;
5937 }
5938 
5939 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5940 // idioms for rotate, and if the target supports rotation instructions, generate
5941 // a rot[lr].
5942 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5943   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5944   EVT VT = LHS.getValueType();
5945   if (!TLI.isTypeLegal(VT)) return nullptr;
5946 
5947   // The target must have at least one rotate flavor.
5948   bool HasROTL = hasOperation(ISD::ROTL, VT);
5949   bool HasROTR = hasOperation(ISD::ROTR, VT);
5950   if (!HasROTL && !HasROTR) return nullptr;
5951 
5952   // Check for truncated rotate.
5953   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5954       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5955     assert(LHS.getValueType() == RHS.getValueType());
5956     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5957       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5958                          SDValue(Rot, 0)).getNode();
5959     }
5960   }
5961 
5962   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5963   SDValue LHSShift;   // The shift.
5964   SDValue LHSMask;    // AND value if any.
5965   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5966 
5967   SDValue RHSShift;   // The shift.
5968   SDValue RHSMask;    // AND value if any.
5969   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5970 
5971   // If neither side matched a rotate half, bail
5972   if (!LHSShift && !RHSShift)
5973     return nullptr;
5974 
5975   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5976   // side of the rotate, so try to handle that here. In all cases we need to
5977   // pass the matched shift from the opposite side to compute the opcode and
5978   // needed shift amount to extract.  We still want to do this if both sides
5979   // matched a rotate half because one half may be a potential overshift that
5980   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5981   // single one).
5982 
5983   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5984   if (LHSShift)
5985     if (SDValue NewRHSShift =
5986             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5987       RHSShift = NewRHSShift;
5988   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5989   if (RHSShift)
5990     if (SDValue NewLHSShift =
5991             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5992       LHSShift = NewLHSShift;
5993 
5994   // If a side is still missing, nothing else we can do.
5995   if (!RHSShift || !LHSShift)
5996     return nullptr;
5997 
5998   // At this point we've matched or extracted a shift op on each side.
5999 
6000   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6001     return nullptr;   // Not shifting the same value.
6002 
6003   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6004     return nullptr;   // Shifts must disagree.
6005 
6006   // Canonicalize shl to left side in a shl/srl pair.
6007   if (RHSShift.getOpcode() == ISD::SHL) {
6008     std::swap(LHS, RHS);
6009     std::swap(LHSShift, RHSShift);
6010     std::swap(LHSMask, RHSMask);
6011   }
6012 
6013   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6014   SDValue LHSShiftArg = LHSShift.getOperand(0);
6015   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6016   SDValue RHSShiftArg = RHSShift.getOperand(0);
6017   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6018 
6019   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6020   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6021   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6022                                         ConstantSDNode *RHS) {
6023     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6024   };
6025   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6026     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6027                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6028 
6029     // If there is an AND of either shifted operand, apply it to the result.
6030     if (LHSMask.getNode() || RHSMask.getNode()) {
6031       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6032       SDValue Mask = AllOnes;
6033 
6034       if (LHSMask.getNode()) {
6035         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6036         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6037                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6038       }
6039       if (RHSMask.getNode()) {
6040         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6041         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6042                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6043       }
6044 
6045       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6046     }
6047 
6048     return Rot.getNode();
6049   }
6050 
6051   // If there is a mask here, and we have a variable shift, we can't be sure
6052   // that we're masking out the right stuff.
6053   if (LHSMask.getNode() || RHSMask.getNode())
6054     return nullptr;
6055 
6056   // If the shift amount is sign/zext/any-extended just peel it off.
6057   SDValue LExtOp0 = LHSShiftAmt;
6058   SDValue RExtOp0 = RHSShiftAmt;
6059   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6060        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6061        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6062        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6063       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6064        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6065        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6066        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6067     LExtOp0 = LHSShiftAmt.getOperand(0);
6068     RExtOp0 = RHSShiftAmt.getOperand(0);
6069   }
6070 
6071   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6072                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6073   if (TryL)
6074     return TryL;
6075 
6076   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6077                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6078   if (TryR)
6079     return TryR;
6080 
6081   return nullptr;
6082 }
6083 
6084 namespace {
6085 
6086 /// Represents known origin of an individual byte in load combine pattern. The
6087 /// value of the byte is either constant zero or comes from memory.
6088 struct ByteProvider {
6089   // For constant zero providers Load is set to nullptr. For memory providers
6090   // Load represents the node which loads the byte from memory.
6091   // ByteOffset is the offset of the byte in the value produced by the load.
6092   LoadSDNode *Load = nullptr;
6093   unsigned ByteOffset = 0;
6094 
6095   ByteProvider() = default;
6096 
6097   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6098     return ByteProvider(Load, ByteOffset);
6099   }
6100 
6101   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6102 
6103   bool isConstantZero() const { return !Load; }
6104   bool isMemory() const { return Load; }
6105 
6106   bool operator==(const ByteProvider &Other) const {
6107     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6108   }
6109 
6110 private:
6111   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6112       : Load(Load), ByteOffset(ByteOffset) {}
6113 };
6114 
6115 } // end anonymous namespace
6116 
6117 /// Recursively traverses the expression calculating the origin of the requested
6118 /// byte of the given value. Returns None if the provider can't be calculated.
6119 ///
6120 /// For all the values except the root of the expression verifies that the value
6121 /// has exactly one use and if it's not true return None. This way if the origin
6122 /// of the byte is returned it's guaranteed that the values which contribute to
6123 /// the byte are not used outside of this expression.
6124 ///
6125 /// Because the parts of the expression are not allowed to have more than one
6126 /// use this function iterates over trees, not DAGs. So it never visits the same
6127 /// node more than once.
6128 static const Optional<ByteProvider>
6129 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6130                       bool Root = false) {
6131   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6132   if (Depth == 10)
6133     return None;
6134 
6135   if (!Root && !Op.hasOneUse())
6136     return None;
6137 
6138   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6139   unsigned BitWidth = Op.getValueSizeInBits();
6140   if (BitWidth % 8 != 0)
6141     return None;
6142   unsigned ByteWidth = BitWidth / 8;
6143   assert(Index < ByteWidth && "invalid index requested");
6144   (void) ByteWidth;
6145 
6146   switch (Op.getOpcode()) {
6147   case ISD::OR: {
6148     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6149     if (!LHS)
6150       return None;
6151     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6152     if (!RHS)
6153       return None;
6154 
6155     if (LHS->isConstantZero())
6156       return RHS;
6157     if (RHS->isConstantZero())
6158       return LHS;
6159     return None;
6160   }
6161   case ISD::SHL: {
6162     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6163     if (!ShiftOp)
6164       return None;
6165 
6166     uint64_t BitShift = ShiftOp->getZExtValue();
6167     if (BitShift % 8 != 0)
6168       return None;
6169     uint64_t ByteShift = BitShift / 8;
6170 
6171     return Index < ByteShift
6172                ? ByteProvider::getConstantZero()
6173                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6174                                        Depth + 1);
6175   }
6176   case ISD::ANY_EXTEND:
6177   case ISD::SIGN_EXTEND:
6178   case ISD::ZERO_EXTEND: {
6179     SDValue NarrowOp = Op->getOperand(0);
6180     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6181     if (NarrowBitWidth % 8 != 0)
6182       return None;
6183     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6184 
6185     if (Index >= NarrowByteWidth)
6186       return Op.getOpcode() == ISD::ZERO_EXTEND
6187                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6188                  : None;
6189     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6190   }
6191   case ISD::BSWAP:
6192     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6193                                  Depth + 1);
6194   case ISD::LOAD: {
6195     auto L = cast<LoadSDNode>(Op.getNode());
6196     if (L->isVolatile() || L->isIndexed())
6197       return None;
6198 
6199     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6200     if (NarrowBitWidth % 8 != 0)
6201       return None;
6202     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6203 
6204     if (Index >= NarrowByteWidth)
6205       return L->getExtensionType() == ISD::ZEXTLOAD
6206                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6207                  : None;
6208     return ByteProvider::getMemory(L, Index);
6209   }
6210   }
6211 
6212   return None;
6213 }
6214 
6215 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6216   return i;
6217 }
6218 
6219 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6220   return BW - i - 1;
6221 }
6222 
6223 // Check if the bytes offsets we are looking at match with either big or
6224 // little endian value loaded. Return true for big endian, false for little
6225 // endian, and None if match failed.
6226 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6227                                   int64_t FirstOffset) {
6228   // The endian can be decided only when it is 2 bytes at least.
6229   unsigned Width = ByteOffsets.size();
6230   if (Width < 2)
6231     return None;
6232 
6233   bool BigEndian = true, LittleEndian = true;
6234   for (unsigned i = 0; i < Width; i++) {
6235     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6236     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6237     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6238     if (!BigEndian && !LittleEndian)
6239       return None;
6240   }
6241 
6242   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6243                                         "little endian");
6244   return BigEndian;
6245 }
6246 
6247 /// Match a pattern where a wide type scalar value is loaded by several narrow
6248 /// loads and combined by shifts and ors. Fold it into a single load or a load
6249 /// and a BSWAP if the targets supports it.
6250 ///
6251 /// Assuming little endian target:
6252 ///  i8 *a = ...
6253 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6254 /// =>
6255 ///  i32 val = *((i32)a)
6256 ///
6257 ///  i8 *a = ...
6258 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6259 /// =>
6260 ///  i32 val = BSWAP(*((i32)a))
6261 ///
6262 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6263 /// interact well with the worklist mechanism. When a part of the pattern is
6264 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6265 /// but the root node of the pattern which triggers the load combine is not
6266 /// necessarily a direct user of the changed node. For example, once the address
6267 /// of t28 load is reassociated load combine won't be triggered:
6268 ///             t25: i32 = add t4, Constant:i32<2>
6269 ///           t26: i64 = sign_extend t25
6270 ///        t27: i64 = add t2, t26
6271 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6272 ///     t29: i32 = zero_extend t28
6273 ///   t32: i32 = shl t29, Constant:i8<8>
6274 /// t33: i32 = or t23, t32
6275 /// As a possible fix visitLoad can check if the load can be a part of a load
6276 /// combine pattern and add corresponding OR roots to the worklist.
6277 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6278   assert(N->getOpcode() == ISD::OR &&
6279          "Can only match load combining against OR nodes");
6280 
6281   // Handles simple types only
6282   EVT VT = N->getValueType(0);
6283   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6284     return SDValue();
6285   unsigned ByteWidth = VT.getSizeInBits() / 8;
6286 
6287   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6288   // Before legalize we can introduce too wide illegal loads which will be later
6289   // split into legal sized loads. This enables us to combine i64 load by i8
6290   // patterns to a couple of i32 loads on 32 bit targets.
6291   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6292     return SDValue();
6293 
6294   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6295   auto MemoryByteOffset = [&] (ByteProvider P) {
6296     assert(P.isMemory() && "Must be a memory byte provider");
6297     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6298     assert(LoadBitWidth % 8 == 0 &&
6299            "can only analyze providers for individual bytes not bit");
6300     unsigned LoadByteWidth = LoadBitWidth / 8;
6301     return IsBigEndianTarget
6302             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6303             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6304   };
6305 
6306   Optional<BaseIndexOffset> Base;
6307   SDValue Chain;
6308 
6309   SmallPtrSet<LoadSDNode *, 8> Loads;
6310   Optional<ByteProvider> FirstByteProvider;
6311   int64_t FirstOffset = INT64_MAX;
6312 
6313   // Check if all the bytes of the OR we are looking at are loaded from the same
6314   // base address. Collect bytes offsets from Base address in ByteOffsets.
6315   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6316   for (unsigned i = 0; i < ByteWidth; i++) {
6317     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6318     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6319       return SDValue();
6320 
6321     LoadSDNode *L = P->Load;
6322     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6323            "Must be enforced by calculateByteProvider");
6324     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6325 
6326     // All loads must share the same chain
6327     SDValue LChain = L->getChain();
6328     if (!Chain)
6329       Chain = LChain;
6330     else if (Chain != LChain)
6331       return SDValue();
6332 
6333     // Loads must share the same base address
6334     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6335     int64_t ByteOffsetFromBase = 0;
6336     if (!Base)
6337       Base = Ptr;
6338     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6339       return SDValue();
6340 
6341     // Calculate the offset of the current byte from the base address
6342     ByteOffsetFromBase += MemoryByteOffset(*P);
6343     ByteOffsets[i] = ByteOffsetFromBase;
6344 
6345     // Remember the first byte load
6346     if (ByteOffsetFromBase < FirstOffset) {
6347       FirstByteProvider = P;
6348       FirstOffset = ByteOffsetFromBase;
6349     }
6350 
6351     Loads.insert(L);
6352   }
6353   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6354          "memory, so there must be at least one load which produces the value");
6355   assert(Base && "Base address of the accessed memory location must be set");
6356   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6357 
6358   // Check if the bytes of the OR we are looking at match with either big or
6359   // little endian value load
6360   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6361   if (!IsBigEndian.hasValue())
6362     return SDValue();
6363 
6364   assert(FirstByteProvider && "must be set");
6365 
6366   // Ensure that the first byte is loaded from zero offset of the first load.
6367   // So the combined value can be loaded from the first load address.
6368   if (MemoryByteOffset(*FirstByteProvider) != 0)
6369     return SDValue();
6370   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6371 
6372   // The node we are looking at matches with the pattern, check if we can
6373   // replace it with a single load and bswap if needed.
6374 
6375   // If the load needs byte swap check if the target supports it
6376   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6377 
6378   // Before legalize we can introduce illegal bswaps which will be later
6379   // converted to an explicit bswap sequence. This way we end up with a single
6380   // load and byte shuffling instead of several loads and byte shuffling.
6381   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6382     return SDValue();
6383 
6384   // Check that a load of the wide type is both allowed and fast on the target
6385   bool Fast = false;
6386   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6387                                         VT, FirstLoad->getAddressSpace(),
6388                                         FirstLoad->getAlignment(), &Fast);
6389   if (!Allowed || !Fast)
6390     return SDValue();
6391 
6392   SDValue NewLoad =
6393       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6394                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6395 
6396   // Transfer chain users from old loads to the new load.
6397   for (LoadSDNode *L : Loads)
6398     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6399 
6400   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6401 }
6402 
6403 // If the target has andn, bsl, or a similar bit-select instruction,
6404 // we want to unfold masked merge, with canonical pattern of:
6405 //   |        A  |  |B|
6406 //   ((x ^ y) & m) ^ y
6407 //    |  D  |
6408 // Into:
6409 //   (x & m) | (y & ~m)
6410 // If y is a constant, and the 'andn' does not work with immediates,
6411 // we unfold into a different pattern:
6412 //   ~(~x & m) & (m | y)
6413 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6414 //       the very least that breaks andnpd / andnps patterns, and because those
6415 //       patterns are simplified in IR and shouldn't be created in the DAG
6416 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6417   assert(N->getOpcode() == ISD::XOR);
6418 
6419   // Don't touch 'not' (i.e. where y = -1).
6420   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6421     return SDValue();
6422 
6423   EVT VT = N->getValueType(0);
6424 
6425   // There are 3 commutable operators in the pattern,
6426   // so we have to deal with 8 possible variants of the basic pattern.
6427   SDValue X, Y, M;
6428   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6429     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6430       return false;
6431     SDValue Xor = And.getOperand(XorIdx);
6432     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6433       return false;
6434     SDValue Xor0 = Xor.getOperand(0);
6435     SDValue Xor1 = Xor.getOperand(1);
6436     // Don't touch 'not' (i.e. where y = -1).
6437     if (isAllOnesOrAllOnesSplat(Xor1))
6438       return false;
6439     if (Other == Xor0)
6440       std::swap(Xor0, Xor1);
6441     if (Other != Xor1)
6442       return false;
6443     X = Xor0;
6444     Y = Xor1;
6445     M = And.getOperand(XorIdx ? 0 : 1);
6446     return true;
6447   };
6448 
6449   SDValue N0 = N->getOperand(0);
6450   SDValue N1 = N->getOperand(1);
6451   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6452       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6453     return SDValue();
6454 
6455   // Don't do anything if the mask is constant. This should not be reachable.
6456   // InstCombine should have already unfolded this pattern, and DAGCombiner
6457   // probably shouldn't produce it, too.
6458   if (isa<ConstantSDNode>(M.getNode()))
6459     return SDValue();
6460 
6461   // We can transform if the target has AndNot
6462   if (!TLI.hasAndNot(M))
6463     return SDValue();
6464 
6465   SDLoc DL(N);
6466 
6467   // If Y is a constant, check that 'andn' works with immediates.
6468   if (!TLI.hasAndNot(Y)) {
6469     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6470     // If not, we need to do a bit more work to make sure andn is still used.
6471     SDValue NotX = DAG.getNOT(DL, X, VT);
6472     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6473     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6474     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6475     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6476   }
6477 
6478   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6479   SDValue NotM = DAG.getNOT(DL, M, VT);
6480   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6481 
6482   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6483 }
6484 
6485 SDValue DAGCombiner::visitXOR(SDNode *N) {
6486   SDValue N0 = N->getOperand(0);
6487   SDValue N1 = N->getOperand(1);
6488   EVT VT = N0.getValueType();
6489 
6490   // fold vector ops
6491   if (VT.isVector()) {
6492     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6493       return FoldedVOp;
6494 
6495     // fold (xor x, 0) -> x, vector edition
6496     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6497       return N1;
6498     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6499       return N0;
6500   }
6501 
6502   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6503   SDLoc DL(N);
6504   if (N0.isUndef() && N1.isUndef())
6505     return DAG.getConstant(0, DL, VT);
6506   // fold (xor x, undef) -> undef
6507   if (N0.isUndef())
6508     return N0;
6509   if (N1.isUndef())
6510     return N1;
6511   // fold (xor c1, c2) -> c1^c2
6512   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6513   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6514   if (N0C && N1C)
6515     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6516   // canonicalize constant to RHS
6517   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6518      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6519     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6520   // fold (xor x, 0) -> x
6521   if (isNullConstant(N1))
6522     return N0;
6523 
6524   if (SDValue NewSel = foldBinOpIntoSelect(N))
6525     return NewSel;
6526 
6527   // reassociate xor
6528   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6529     return RXOR;
6530 
6531   // fold !(x cc y) -> (x !cc y)
6532   unsigned N0Opcode = N0.getOpcode();
6533   SDValue LHS, RHS, CC;
6534   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6535     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6536                                                LHS.getValueType().isInteger());
6537     if (!LegalOperations ||
6538         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6539       switch (N0Opcode) {
6540       default:
6541         llvm_unreachable("Unhandled SetCC Equivalent!");
6542       case ISD::SETCC:
6543         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6544       case ISD::SELECT_CC:
6545         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6546                                N0.getOperand(3), NotCC);
6547       }
6548     }
6549   }
6550 
6551   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6552   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6553       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6554     SDValue V = N0.getOperand(0);
6555     SDLoc DL0(N0);
6556     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6557                     DAG.getConstant(1, DL0, V.getValueType()));
6558     AddToWorklist(V.getNode());
6559     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6560   }
6561 
6562   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6563   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6564       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6565     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6566     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6567       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6568       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6569       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6570       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6571       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6572     }
6573   }
6574   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6575   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6576       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6577     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6578     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6579       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6580       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6581       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6582       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6583       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6584     }
6585   }
6586   // fold (xor (and x, y), y) -> (and (not x), y)
6587   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6588     SDValue X = N0.getOperand(0);
6589     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6590     AddToWorklist(NotX.getNode());
6591     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6592   }
6593 
6594   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6595     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6596     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6597     unsigned BitWidth = VT.getScalarSizeInBits();
6598     if (XorC && ShiftC) {
6599       // Don't crash on an oversized shift. We can not guarantee that a bogus
6600       // shift has been simplified to undef.
6601       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6602       if (ShiftAmt < BitWidth) {
6603         APInt Ones = APInt::getAllOnesValue(BitWidth);
6604         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6605         if (XorC->getAPIntValue() == Ones) {
6606           // If the xor constant is a shifted -1, do a 'not' before the shift:
6607           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6608           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6609           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6610           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6611         }
6612       }
6613     }
6614   }
6615 
6616   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6617   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6618     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6619     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6620     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6621       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6622       SDValue S0 = S.getOperand(0);
6623       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6624         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6625         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6626           if (C->getAPIntValue() == (OpSizeInBits - 1))
6627             return DAG.getNode(ISD::ABS, DL, VT, S0);
6628       }
6629     }
6630   }
6631 
6632   // fold (xor x, x) -> 0
6633   if (N0 == N1)
6634     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6635 
6636   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6637   // Here is a concrete example of this equivalence:
6638   // i16   x ==  14
6639   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6640   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6641   //
6642   // =>
6643   //
6644   // i16     ~1      == 0b1111111111111110
6645   // i16 rol(~1, 14) == 0b1011111111111111
6646   //
6647   // Some additional tips to help conceptualize this transform:
6648   // - Try to see the operation as placing a single zero in a value of all ones.
6649   // - There exists no value for x which would allow the result to contain zero.
6650   // - Values of x larger than the bitwidth are undefined and do not require a
6651   //   consistent result.
6652   // - Pushing the zero left requires shifting one bits in from the right.
6653   // A rotate left of ~1 is a nice way of achieving the desired result.
6654   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6655       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6656     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6657                        N0.getOperand(1));
6658   }
6659 
6660   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6661   if (N0Opcode == N1.getOpcode())
6662     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6663       return V;
6664 
6665   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6666   if (SDValue MM = unfoldMaskedMerge(N))
6667     return MM;
6668 
6669   // Simplify the expression using non-local knowledge.
6670   if (SimplifyDemandedBits(SDValue(N, 0)))
6671     return SDValue(N, 0);
6672 
6673   return SDValue();
6674 }
6675 
6676 /// Handle transforms common to the three shifts, when the shift amount is a
6677 /// constant.
6678 /// We are looking for: (shift being one of shl/sra/srl)
6679 ///   shift (binop X, C0), C1
6680 /// And want to transform into:
6681 ///   binop (shift X, C1), (shift C0, C1)
6682 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6683   // Do not turn a 'not' into a regular xor.
6684   if (isBitwiseNot(N->getOperand(0)))
6685     return SDValue();
6686 
6687   // The inner binop must be one-use, since we want to replace it.
6688   SDNode *LHS = N->getOperand(0).getNode();
6689   if (!LHS->hasOneUse()) return SDValue();
6690 
6691   // We want to pull some binops through shifts, so that we have (and (shift))
6692   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6693   // thing happens with address calculations, so it's important to canonicalize
6694   // it.
6695   switch (LHS->getOpcode()) {
6696   default:
6697     return SDValue();
6698   case ISD::OR:
6699   case ISD::XOR:
6700   case ISD::AND:
6701     break;
6702   case ISD::ADD:
6703     if (N->getOpcode() != ISD::SHL)
6704       return SDValue(); // only shl(add) not sr[al](add).
6705     break;
6706   }
6707 
6708   // We require the RHS of the binop to be a constant and not opaque as well.
6709   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6710   if (!BinOpCst)
6711     return SDValue();
6712 
6713   // FIXME: disable this unless the input to the binop is a shift by a constant
6714   // or is copy/select. Enable this in other cases when figure out it's exactly
6715   // profitable.
6716   SDValue BinOpLHSVal = LHS->getOperand(0);
6717   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
6718                             BinOpLHSVal.getOpcode() == ISD::SRA ||
6719                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
6720                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
6721   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
6722                         BinOpLHSVal.getOpcode() == ISD::SELECT;
6723 
6724   if (!IsShiftByConstant && !IsCopyOrSelect)
6725     return SDValue();
6726 
6727   if (IsCopyOrSelect && N->hasOneUse())
6728     return SDValue();
6729 
6730   EVT VT = N->getValueType(0);
6731 
6732   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6733     return SDValue();
6734 
6735   // Fold the constants, shifting the binop RHS by the shift amount.
6736   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6737                                N->getValueType(0),
6738                                LHS->getOperand(1), N->getOperand(1));
6739   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6740 
6741   // Create the new shift.
6742   SDValue NewShift = DAG.getNode(N->getOpcode(),
6743                                  SDLoc(LHS->getOperand(0)),
6744                                  VT, LHS->getOperand(0), N->getOperand(1));
6745 
6746   // Create the new binop.
6747   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6748 }
6749 
6750 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6751   assert(N->getOpcode() == ISD::TRUNCATE);
6752   assert(N->getOperand(0).getOpcode() == ISD::AND);
6753 
6754   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6755   EVT TruncVT = N->getValueType(0);
6756   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
6757       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
6758     SDValue N01 = N->getOperand(0).getOperand(1);
6759     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6760       SDLoc DL(N);
6761       SDValue N00 = N->getOperand(0).getOperand(0);
6762       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6763       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6764       AddToWorklist(Trunc00.getNode());
6765       AddToWorklist(Trunc01.getNode());
6766       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6767     }
6768   }
6769 
6770   return SDValue();
6771 }
6772 
6773 SDValue DAGCombiner::visitRotate(SDNode *N) {
6774   SDLoc dl(N);
6775   SDValue N0 = N->getOperand(0);
6776   SDValue N1 = N->getOperand(1);
6777   EVT VT = N->getValueType(0);
6778   unsigned Bitsize = VT.getScalarSizeInBits();
6779 
6780   // fold (rot x, 0) -> x
6781   if (isNullOrNullSplat(N1))
6782     return N0;
6783 
6784   // fold (rot x, c) -> x iff (c % BitSize) == 0
6785   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6786     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6787     if (DAG.MaskedValueIsZero(N1, ModuloMask))
6788       return N0;
6789   }
6790 
6791   // fold (rot x, c) -> (rot x, c % BitSize)
6792   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6793     if (Cst->getAPIntValue().uge(Bitsize)) {
6794       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6795       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6796                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6797     }
6798   }
6799 
6800   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6801   if (N1.getOpcode() == ISD::TRUNCATE &&
6802       N1.getOperand(0).getOpcode() == ISD::AND) {
6803     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6804       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6805   }
6806 
6807   unsigned NextOp = N0.getOpcode();
6808   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6809   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6810     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6811     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6812     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6813       EVT ShiftVT = C1->getValueType(0);
6814       bool SameSide = (N->getOpcode() == NextOp);
6815       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6816       if (SDValue CombinedShift =
6817               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6818         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6819         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6820             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6821             BitsizeC.getNode());
6822         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6823                            CombinedShiftNorm);
6824       }
6825     }
6826   }
6827   return SDValue();
6828 }
6829 
6830 SDValue DAGCombiner::visitSHL(SDNode *N) {
6831   SDValue N0 = N->getOperand(0);
6832   SDValue N1 = N->getOperand(1);
6833   if (SDValue V = DAG.simplifyShift(N0, N1))
6834     return V;
6835 
6836   EVT VT = N0.getValueType();
6837   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6838 
6839   // fold vector ops
6840   if (VT.isVector()) {
6841     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6842       return FoldedVOp;
6843 
6844     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6845     // If setcc produces all-one true value then:
6846     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6847     if (N1CV && N1CV->isConstant()) {
6848       if (N0.getOpcode() == ISD::AND) {
6849         SDValue N00 = N0->getOperand(0);
6850         SDValue N01 = N0->getOperand(1);
6851         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6852 
6853         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6854             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6855                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6856           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6857                                                      N01CV, N1CV))
6858             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6859         }
6860       }
6861     }
6862   }
6863 
6864   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6865 
6866   // fold (shl c1, c2) -> c1<<c2
6867   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6868   if (N0C && N1C && !N1C->isOpaque())
6869     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6870 
6871   if (SDValue NewSel = foldBinOpIntoSelect(N))
6872     return NewSel;
6873 
6874   // if (shl x, c) is known to be zero, return 0
6875   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6876                             APInt::getAllOnesValue(OpSizeInBits)))
6877     return DAG.getConstant(0, SDLoc(N), VT);
6878   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6879   if (N1.getOpcode() == ISD::TRUNCATE &&
6880       N1.getOperand(0).getOpcode() == ISD::AND) {
6881     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6882       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6883   }
6884 
6885   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6886     return SDValue(N, 0);
6887 
6888   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6889   if (N0.getOpcode() == ISD::SHL) {
6890     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6891                                           ConstantSDNode *RHS) {
6892       APInt c1 = LHS->getAPIntValue();
6893       APInt c2 = RHS->getAPIntValue();
6894       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6895       return (c1 + c2).uge(OpSizeInBits);
6896     };
6897     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6898       return DAG.getConstant(0, SDLoc(N), VT);
6899 
6900     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6901                                        ConstantSDNode *RHS) {
6902       APInt c1 = LHS->getAPIntValue();
6903       APInt c2 = RHS->getAPIntValue();
6904       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6905       return (c1 + c2).ult(OpSizeInBits);
6906     };
6907     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6908       SDLoc DL(N);
6909       EVT ShiftVT = N1.getValueType();
6910       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6911       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6912     }
6913   }
6914 
6915   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6916   // For this to be valid, the second form must not preserve any of the bits
6917   // that are shifted out by the inner shift in the first form.  This means
6918   // the outer shift size must be >= the number of bits added by the ext.
6919   // As a corollary, we don't care what kind of ext it is.
6920   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6921               N0.getOpcode() == ISD::ANY_EXTEND ||
6922               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6923       N0.getOperand(0).getOpcode() == ISD::SHL) {
6924     SDValue N0Op0 = N0.getOperand(0);
6925     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6926       APInt c1 = N0Op0C1->getAPIntValue();
6927       APInt c2 = N1C->getAPIntValue();
6928       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6929 
6930       EVT InnerShiftVT = N0Op0.getValueType();
6931       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6932       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6933         SDLoc DL(N0);
6934         APInt Sum = c1 + c2;
6935         if (Sum.uge(OpSizeInBits))
6936           return DAG.getConstant(0, DL, VT);
6937 
6938         return DAG.getNode(
6939             ISD::SHL, DL, VT,
6940             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6941             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6942       }
6943     }
6944   }
6945 
6946   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6947   // Only fold this if the inner zext has no other uses to avoid increasing
6948   // the total number of instructions.
6949   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6950       N0.getOperand(0).getOpcode() == ISD::SRL) {
6951     SDValue N0Op0 = N0.getOperand(0);
6952     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6953       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6954         uint64_t c1 = N0Op0C1->getZExtValue();
6955         uint64_t c2 = N1C->getZExtValue();
6956         if (c1 == c2) {
6957           SDValue NewOp0 = N0.getOperand(0);
6958           EVT CountVT = NewOp0.getOperand(1).getValueType();
6959           SDLoc DL(N);
6960           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6961                                        NewOp0,
6962                                        DAG.getConstant(c2, DL, CountVT));
6963           AddToWorklist(NewSHL.getNode());
6964           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6965         }
6966       }
6967     }
6968   }
6969 
6970   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6971   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6972   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6973       N0->getFlags().hasExact()) {
6974     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6975       uint64_t C1 = N0C1->getZExtValue();
6976       uint64_t C2 = N1C->getZExtValue();
6977       SDLoc DL(N);
6978       if (C1 <= C2)
6979         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6980                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6981       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6982                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6983     }
6984   }
6985 
6986   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6987   //                               (and (srl x, (sub c1, c2), MASK)
6988   // Only fold this if the inner shift has no other uses -- if it does, folding
6989   // this will increase the total number of instructions.
6990   // TODO - drop hasOneUse requirement if c1 == c2?
6991   // TODO - support non-uniform vector shift amounts.
6992   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6993       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
6994     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6995       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
6996         uint64_t c1 = N0C1->getZExtValue();
6997         uint64_t c2 = N1C->getZExtValue();
6998         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6999         SDValue Shift;
7000         if (c2 > c1) {
7001           Mask <<= c2 - c1;
7002           SDLoc DL(N);
7003           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7004                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
7005         } else {
7006           Mask.lshrInPlace(c1 - c2);
7007           SDLoc DL(N);
7008           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7009                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
7010         }
7011         SDLoc DL(N0);
7012         return DAG.getNode(ISD::AND, DL, VT, Shift,
7013                            DAG.getConstant(Mask, DL, VT));
7014       }
7015     }
7016   }
7017 
7018   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7019   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7020       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7021     SDLoc DL(N);
7022     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7023     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7024     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7025   }
7026 
7027   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7028   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7029   // Variant of version done on multiply, except mul by a power of 2 is turned
7030   // into a shift.
7031   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7032       N0.getNode()->hasOneUse() &&
7033       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7034       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7035       TLI.isDesirableToCommuteWithShift(N, Level)) {
7036     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7037     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7038     AddToWorklist(Shl0.getNode());
7039     AddToWorklist(Shl1.getNode());
7040     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7041   }
7042 
7043   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7044   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7045       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7046       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7047     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7048     if (isConstantOrConstantVector(Shl))
7049       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7050   }
7051 
7052   if (N1C && !N1C->isOpaque())
7053     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
7054       return NewSHL;
7055 
7056   return SDValue();
7057 }
7058 
7059 SDValue DAGCombiner::visitSRA(SDNode *N) {
7060   SDValue N0 = N->getOperand(0);
7061   SDValue N1 = N->getOperand(1);
7062   if (SDValue V = DAG.simplifyShift(N0, N1))
7063     return V;
7064 
7065   EVT VT = N0.getValueType();
7066   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7067 
7068   // Arithmetic shifting an all-sign-bit value is a no-op.
7069   // fold (sra 0, x) -> 0
7070   // fold (sra -1, x) -> -1
7071   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7072     return N0;
7073 
7074   // fold vector ops
7075   if (VT.isVector())
7076     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7077       return FoldedVOp;
7078 
7079   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7080 
7081   // fold (sra c1, c2) -> (sra c1, c2)
7082   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7083   if (N0C && N1C && !N1C->isOpaque())
7084     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7085 
7086   if (SDValue NewSel = foldBinOpIntoSelect(N))
7087     return NewSel;
7088 
7089   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7090   // sext_inreg.
7091   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7092     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7093     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7094     if (VT.isVector())
7095       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7096                                ExtVT, VT.getVectorNumElements());
7097     if ((!LegalOperations ||
7098          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7099       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7100                          N0.getOperand(0), DAG.getValueType(ExtVT));
7101   }
7102 
7103   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7104   // clamp (add c1, c2) to max shift.
7105   if (N0.getOpcode() == ISD::SRA) {
7106     SDLoc DL(N);
7107     EVT ShiftVT = N1.getValueType();
7108     EVT ShiftSVT = ShiftVT.getScalarType();
7109     SmallVector<SDValue, 16> ShiftValues;
7110 
7111     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7112       APInt c1 = LHS->getAPIntValue();
7113       APInt c2 = RHS->getAPIntValue();
7114       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7115       APInt Sum = c1 + c2;
7116       unsigned ShiftSum =
7117           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7118       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7119       return true;
7120     };
7121     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7122       SDValue ShiftValue;
7123       if (VT.isVector())
7124         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7125       else
7126         ShiftValue = ShiftValues[0];
7127       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7128     }
7129   }
7130 
7131   // fold (sra (shl X, m), (sub result_size, n))
7132   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7133   // result_size - n != m.
7134   // If truncate is free for the target sext(shl) is likely to result in better
7135   // code.
7136   if (N0.getOpcode() == ISD::SHL && N1C) {
7137     // Get the two constanst of the shifts, CN0 = m, CN = n.
7138     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7139     if (N01C) {
7140       LLVMContext &Ctx = *DAG.getContext();
7141       // Determine what the truncate's result bitsize and type would be.
7142       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7143 
7144       if (VT.isVector())
7145         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7146 
7147       // Determine the residual right-shift amount.
7148       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7149 
7150       // If the shift is not a no-op (in which case this should be just a sign
7151       // extend already), the truncated to type is legal, sign_extend is legal
7152       // on that type, and the truncate to that type is both legal and free,
7153       // perform the transform.
7154       if ((ShiftAmt > 0) &&
7155           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7156           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7157           TLI.isTruncateFree(VT, TruncVT)) {
7158         SDLoc DL(N);
7159         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7160             getShiftAmountTy(N0.getOperand(0).getValueType()));
7161         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7162                                     N0.getOperand(0), Amt);
7163         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7164                                     Shift);
7165         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7166                            N->getValueType(0), Trunc);
7167       }
7168     }
7169   }
7170 
7171   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7172   if (N1.getOpcode() == ISD::TRUNCATE &&
7173       N1.getOperand(0).getOpcode() == ISD::AND) {
7174     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7175       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7176   }
7177 
7178   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7179   //      if c1 is equal to the number of bits the trunc removes
7180   if (N0.getOpcode() == ISD::TRUNCATE &&
7181       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7182        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7183       N0.getOperand(0).hasOneUse() &&
7184       N0.getOperand(0).getOperand(1).hasOneUse() &&
7185       N1C) {
7186     SDValue N0Op0 = N0.getOperand(0);
7187     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7188       unsigned LargeShiftVal = LargeShift->getZExtValue();
7189       EVT LargeVT = N0Op0.getValueType();
7190 
7191       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
7192         SDLoc DL(N);
7193         SDValue Amt =
7194           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
7195                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
7196         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
7197                                   N0Op0.getOperand(0), Amt);
7198         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7199       }
7200     }
7201   }
7202 
7203   // Simplify, based on bits shifted out of the LHS.
7204   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7205     return SDValue(N, 0);
7206 
7207   // If the sign bit is known to be zero, switch this to a SRL.
7208   if (DAG.SignBitIsZero(N0))
7209     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7210 
7211   if (N1C && !N1C->isOpaque())
7212     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7213       return NewSRA;
7214 
7215   return SDValue();
7216 }
7217 
7218 SDValue DAGCombiner::visitSRL(SDNode *N) {
7219   SDValue N0 = N->getOperand(0);
7220   SDValue N1 = N->getOperand(1);
7221   if (SDValue V = DAG.simplifyShift(N0, N1))
7222     return V;
7223 
7224   EVT VT = N0.getValueType();
7225   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7226 
7227   // fold vector ops
7228   if (VT.isVector())
7229     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7230       return FoldedVOp;
7231 
7232   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7233 
7234   // fold (srl c1, c2) -> c1 >>u c2
7235   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7236   if (N0C && N1C && !N1C->isOpaque())
7237     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7238 
7239   if (SDValue NewSel = foldBinOpIntoSelect(N))
7240     return NewSel;
7241 
7242   // if (srl x, c) is known to be zero, return 0
7243   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7244                                    APInt::getAllOnesValue(OpSizeInBits)))
7245     return DAG.getConstant(0, SDLoc(N), VT);
7246 
7247   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7248   if (N0.getOpcode() == ISD::SRL) {
7249     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7250                                           ConstantSDNode *RHS) {
7251       APInt c1 = LHS->getAPIntValue();
7252       APInt c2 = RHS->getAPIntValue();
7253       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7254       return (c1 + c2).uge(OpSizeInBits);
7255     };
7256     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7257       return DAG.getConstant(0, SDLoc(N), VT);
7258 
7259     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7260                                        ConstantSDNode *RHS) {
7261       APInt c1 = LHS->getAPIntValue();
7262       APInt c2 = RHS->getAPIntValue();
7263       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7264       return (c1 + c2).ult(OpSizeInBits);
7265     };
7266     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7267       SDLoc DL(N);
7268       EVT ShiftVT = N1.getValueType();
7269       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7270       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7271     }
7272   }
7273 
7274   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7275   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7276       N0.getOperand(0).getOpcode() == ISD::SRL) {
7277     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7278       uint64_t c1 = N001C->getZExtValue();
7279       uint64_t c2 = N1C->getZExtValue();
7280       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7281       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7282       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7283       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7284       if (c1 + OpSizeInBits == InnerShiftSize) {
7285         SDLoc DL(N0);
7286         if (c1 + c2 >= InnerShiftSize)
7287           return DAG.getConstant(0, DL, VT);
7288         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7289                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7290                                        N0.getOperand(0).getOperand(0),
7291                                        DAG.getConstant(c1 + c2, DL,
7292                                                        ShiftCountVT)));
7293       }
7294     }
7295   }
7296 
7297   // fold (srl (shl x, c), c) -> (and x, cst2)
7298   // TODO - (srl (shl x, c1), c2).
7299   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7300       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7301     SDLoc DL(N);
7302     SDValue Mask =
7303         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7304     AddToWorklist(Mask.getNode());
7305     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7306   }
7307 
7308   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7309   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7310     // Shifting in all undef bits?
7311     EVT SmallVT = N0.getOperand(0).getValueType();
7312     unsigned BitSize = SmallVT.getScalarSizeInBits();
7313     if (N1C->getZExtValue() >= BitSize)
7314       return DAG.getUNDEF(VT);
7315 
7316     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7317       uint64_t ShiftAmt = N1C->getZExtValue();
7318       SDLoc DL0(N0);
7319       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7320                                        N0.getOperand(0),
7321                           DAG.getConstant(ShiftAmt, DL0,
7322                                           getShiftAmountTy(SmallVT)));
7323       AddToWorklist(SmallShift.getNode());
7324       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7325       SDLoc DL(N);
7326       return DAG.getNode(ISD::AND, DL, VT,
7327                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7328                          DAG.getConstant(Mask, DL, VT));
7329     }
7330   }
7331 
7332   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7333   // bit, which is unmodified by sra.
7334   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
7335     if (N0.getOpcode() == ISD::SRA)
7336       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7337   }
7338 
7339   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7340   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7341       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7342     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7343 
7344     // If any of the input bits are KnownOne, then the input couldn't be all
7345     // zeros, thus the result of the srl will always be zero.
7346     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7347 
7348     // If all of the bits input the to ctlz node are known to be zero, then
7349     // the result of the ctlz is "32" and the result of the shift is one.
7350     APInt UnknownBits = ~Known.Zero;
7351     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7352 
7353     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7354     if (UnknownBits.isPowerOf2()) {
7355       // Okay, we know that only that the single bit specified by UnknownBits
7356       // could be set on input to the CTLZ node. If this bit is set, the SRL
7357       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7358       // to an SRL/XOR pair, which is likely to simplify more.
7359       unsigned ShAmt = UnknownBits.countTrailingZeros();
7360       SDValue Op = N0.getOperand(0);
7361 
7362       if (ShAmt) {
7363         SDLoc DL(N0);
7364         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7365                   DAG.getConstant(ShAmt, DL,
7366                                   getShiftAmountTy(Op.getValueType())));
7367         AddToWorklist(Op.getNode());
7368       }
7369 
7370       SDLoc DL(N);
7371       return DAG.getNode(ISD::XOR, DL, VT,
7372                          Op, DAG.getConstant(1, DL, VT));
7373     }
7374   }
7375 
7376   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7377   if (N1.getOpcode() == ISD::TRUNCATE &&
7378       N1.getOperand(0).getOpcode() == ISD::AND) {
7379     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7380       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7381   }
7382 
7383   // fold operands of srl based on knowledge that the low bits are not
7384   // demanded.
7385   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7386     return SDValue(N, 0);
7387 
7388   if (N1C && !N1C->isOpaque())
7389     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7390       return NewSRL;
7391 
7392   // Attempt to convert a srl of a load into a narrower zero-extending load.
7393   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7394     return NarrowLoad;
7395 
7396   // Here is a common situation. We want to optimize:
7397   //
7398   //   %a = ...
7399   //   %b = and i32 %a, 2
7400   //   %c = srl i32 %b, 1
7401   //   brcond i32 %c ...
7402   //
7403   // into
7404   //
7405   //   %a = ...
7406   //   %b = and %a, 2
7407   //   %c = setcc eq %b, 0
7408   //   brcond %c ...
7409   //
7410   // However when after the source operand of SRL is optimized into AND, the SRL
7411   // itself may not be optimized further. Look for it and add the BRCOND into
7412   // the worklist.
7413   if (N->hasOneUse()) {
7414     SDNode *Use = *N->use_begin();
7415     if (Use->getOpcode() == ISD::BRCOND)
7416       AddToWorklist(Use);
7417     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7418       // Also look pass the truncate.
7419       Use = *Use->use_begin();
7420       if (Use->getOpcode() == ISD::BRCOND)
7421         AddToWorklist(Use);
7422     }
7423   }
7424 
7425   return SDValue();
7426 }
7427 
7428 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7429   EVT VT = N->getValueType(0);
7430   SDValue N0 = N->getOperand(0);
7431   SDValue N1 = N->getOperand(1);
7432   SDValue N2 = N->getOperand(2);
7433   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7434   unsigned BitWidth = VT.getScalarSizeInBits();
7435 
7436   // fold (fshl N0, N1, 0) -> N0
7437   // fold (fshr N0, N1, 0) -> N1
7438   if (isPowerOf2_32(BitWidth))
7439     if (DAG.MaskedValueIsZero(
7440             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7441       return IsFSHL ? N0 : N1;
7442 
7443   auto IsUndefOrZero = [](SDValue V) {
7444     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7445   };
7446 
7447   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7448     EVT ShAmtTy = N2.getValueType();
7449 
7450     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7451     if (Cst->getAPIntValue().uge(BitWidth)) {
7452       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7453       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7454                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7455     }
7456 
7457     unsigned ShAmt = Cst->getZExtValue();
7458     if (ShAmt == 0)
7459       return IsFSHL ? N0 : N1;
7460 
7461     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7462     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7463     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7464     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7465     if (IsUndefOrZero(N0))
7466       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7467                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7468                                          SDLoc(N), ShAmtTy));
7469     if (IsUndefOrZero(N1))
7470       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7471                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7472                                          SDLoc(N), ShAmtTy));
7473   }
7474 
7475   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7476   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7477   // iff We know the shift amount is in range.
7478   // TODO: when is it worth doing SUB(BW, N2) as well?
7479   if (isPowerOf2_32(BitWidth)) {
7480     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7481     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7482       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7483     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7484       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7485   }
7486 
7487   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7488   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7489   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7490   // is legal as well we might be better off avoiding non-constant (BW - N2).
7491   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7492   if (N0 == N1 && hasOperation(RotOpc, VT))
7493     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7494 
7495   // Simplify, based on bits shifted out of N0/N1.
7496   if (SimplifyDemandedBits(SDValue(N, 0)))
7497     return SDValue(N, 0);
7498 
7499   return SDValue();
7500 }
7501 
7502 SDValue DAGCombiner::visitABS(SDNode *N) {
7503   SDValue N0 = N->getOperand(0);
7504   EVT VT = N->getValueType(0);
7505 
7506   // fold (abs c1) -> c2
7507   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7508     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7509   // fold (abs (abs x)) -> (abs x)
7510   if (N0.getOpcode() == ISD::ABS)
7511     return N0;
7512   // fold (abs x) -> x iff not-negative
7513   if (DAG.SignBitIsZero(N0))
7514     return N0;
7515   return SDValue();
7516 }
7517 
7518 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7519   SDValue N0 = N->getOperand(0);
7520   EVT VT = N->getValueType(0);
7521 
7522   // fold (bswap c1) -> c2
7523   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7524     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7525   // fold (bswap (bswap x)) -> x
7526   if (N0.getOpcode() == ISD::BSWAP)
7527     return N0->getOperand(0);
7528   return SDValue();
7529 }
7530 
7531 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7532   SDValue N0 = N->getOperand(0);
7533   EVT VT = N->getValueType(0);
7534 
7535   // fold (bitreverse c1) -> c2
7536   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7537     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7538   // fold (bitreverse (bitreverse x)) -> x
7539   if (N0.getOpcode() == ISD::BITREVERSE)
7540     return N0.getOperand(0);
7541   return SDValue();
7542 }
7543 
7544 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7545   SDValue N0 = N->getOperand(0);
7546   EVT VT = N->getValueType(0);
7547 
7548   // fold (ctlz c1) -> c2
7549   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7550     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7551 
7552   // If the value is known never to be zero, switch to the undef version.
7553   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7554     if (DAG.isKnownNeverZero(N0))
7555       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7556   }
7557 
7558   return SDValue();
7559 }
7560 
7561 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7562   SDValue N0 = N->getOperand(0);
7563   EVT VT = N->getValueType(0);
7564 
7565   // fold (ctlz_zero_undef c1) -> c2
7566   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7567     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7568   return SDValue();
7569 }
7570 
7571 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7572   SDValue N0 = N->getOperand(0);
7573   EVT VT = N->getValueType(0);
7574 
7575   // fold (cttz c1) -> c2
7576   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7577     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7578 
7579   // If the value is known never to be zero, switch to the undef version.
7580   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7581     if (DAG.isKnownNeverZero(N0))
7582       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7583   }
7584 
7585   return SDValue();
7586 }
7587 
7588 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7589   SDValue N0 = N->getOperand(0);
7590   EVT VT = N->getValueType(0);
7591 
7592   // fold (cttz_zero_undef c1) -> c2
7593   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7594     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7595   return SDValue();
7596 }
7597 
7598 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7599   SDValue N0 = N->getOperand(0);
7600   EVT VT = N->getValueType(0);
7601 
7602   // fold (ctpop c1) -> c2
7603   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7604     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7605   return SDValue();
7606 }
7607 
7608 // FIXME: This should be checking for no signed zeros on individual operands, as
7609 // well as no nans.
7610 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
7611                                          SDValue RHS,
7612                                          const TargetLowering &TLI) {
7613   const TargetOptions &Options = DAG.getTarget().Options;
7614   EVT VT = LHS.getValueType();
7615 
7616   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7617          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
7618          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7619 }
7620 
7621 /// Generate Min/Max node
7622 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7623                                    SDValue RHS, SDValue True, SDValue False,
7624                                    ISD::CondCode CC, const TargetLowering &TLI,
7625                                    SelectionDAG &DAG) {
7626   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7627     return SDValue();
7628 
7629   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7630   switch (CC) {
7631   case ISD::SETOLT:
7632   case ISD::SETOLE:
7633   case ISD::SETLT:
7634   case ISD::SETLE:
7635   case ISD::SETULT:
7636   case ISD::SETULE: {
7637     // Since it's known never nan to get here already, either fminnum or
7638     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7639     // expanded in terms of it.
7640     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7641     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7642       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7643 
7644     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7645     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7646       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7647     return SDValue();
7648   }
7649   case ISD::SETOGT:
7650   case ISD::SETOGE:
7651   case ISD::SETGT:
7652   case ISD::SETGE:
7653   case ISD::SETUGT:
7654   case ISD::SETUGE: {
7655     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7656     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7657       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7658 
7659     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7660     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7661       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7662     return SDValue();
7663   }
7664   default:
7665     return SDValue();
7666   }
7667 }
7668 
7669 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7670   SDValue Cond = N->getOperand(0);
7671   SDValue N1 = N->getOperand(1);
7672   SDValue N2 = N->getOperand(2);
7673   EVT VT = N->getValueType(0);
7674   EVT CondVT = Cond.getValueType();
7675   SDLoc DL(N);
7676 
7677   if (!VT.isInteger())
7678     return SDValue();
7679 
7680   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7681   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7682   if (!C1 || !C2)
7683     return SDValue();
7684 
7685   // Only do this before legalization to avoid conflicting with target-specific
7686   // transforms in the other direction (create a select from a zext/sext). There
7687   // is also a target-independent combine here in DAGCombiner in the other
7688   // direction for (select Cond, -1, 0) when the condition is not i1.
7689   if (CondVT == MVT::i1 && !LegalOperations) {
7690     if (C1->isNullValue() && C2->isOne()) {
7691       // select Cond, 0, 1 --> zext (!Cond)
7692       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7693       if (VT != MVT::i1)
7694         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7695       return NotCond;
7696     }
7697     if (C1->isNullValue() && C2->isAllOnesValue()) {
7698       // select Cond, 0, -1 --> sext (!Cond)
7699       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7700       if (VT != MVT::i1)
7701         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7702       return NotCond;
7703     }
7704     if (C1->isOne() && C2->isNullValue()) {
7705       // select Cond, 1, 0 --> zext (Cond)
7706       if (VT != MVT::i1)
7707         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7708       return Cond;
7709     }
7710     if (C1->isAllOnesValue() && C2->isNullValue()) {
7711       // select Cond, -1, 0 --> sext (Cond)
7712       if (VT != MVT::i1)
7713         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7714       return Cond;
7715     }
7716 
7717     // For any constants that differ by 1, we can transform the select into an
7718     // extend and add. Use a target hook because some targets may prefer to
7719     // transform in the other direction.
7720     if (TLI.convertSelectOfConstantsToMath(VT)) {
7721       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7722         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7723         if (VT != MVT::i1)
7724           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7725         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7726       }
7727       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7728         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7729         if (VT != MVT::i1)
7730           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7731         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7732       }
7733     }
7734 
7735     return SDValue();
7736   }
7737 
7738   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7739   // We can't do this reliably if integer based booleans have different contents
7740   // to floating point based booleans. This is because we can't tell whether we
7741   // have an integer-based boolean or a floating-point-based boolean unless we
7742   // can find the SETCC that produced it and inspect its operands. This is
7743   // fairly easy if C is the SETCC node, but it can potentially be
7744   // undiscoverable (or not reasonably discoverable). For example, it could be
7745   // in another basic block or it could require searching a complicated
7746   // expression.
7747   if (CondVT.isInteger() &&
7748       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7749           TargetLowering::ZeroOrOneBooleanContent &&
7750       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7751           TargetLowering::ZeroOrOneBooleanContent &&
7752       C1->isNullValue() && C2->isOne()) {
7753     SDValue NotCond =
7754         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7755     if (VT.bitsEq(CondVT))
7756       return NotCond;
7757     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7758   }
7759 
7760   return SDValue();
7761 }
7762 
7763 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7764   SDValue N0 = N->getOperand(0);
7765   SDValue N1 = N->getOperand(1);
7766   SDValue N2 = N->getOperand(2);
7767   EVT VT = N->getValueType(0);
7768   EVT VT0 = N0.getValueType();
7769   SDLoc DL(N);
7770 
7771   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7772     return V;
7773 
7774   // fold (select X, X, Y) -> (or X, Y)
7775   // fold (select X, 1, Y) -> (or C, Y)
7776   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7777     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7778 
7779   if (SDValue V = foldSelectOfConstants(N))
7780     return V;
7781 
7782   // fold (select C, 0, X) -> (and (not C), X)
7783   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7784     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7785     AddToWorklist(NOTNode.getNode());
7786     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7787   }
7788   // fold (select C, X, 1) -> (or (not C), X)
7789   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7790     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7791     AddToWorklist(NOTNode.getNode());
7792     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7793   }
7794   // fold (select X, Y, X) -> (and X, Y)
7795   // fold (select X, Y, 0) -> (and X, Y)
7796   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7797     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7798 
7799   // If we can fold this based on the true/false value, do so.
7800   if (SimplifySelectOps(N, N1, N2))
7801     return SDValue(N, 0); // Don't revisit N.
7802 
7803   if (VT0 == MVT::i1) {
7804     // The code in this block deals with the following 2 equivalences:
7805     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7806     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7807     // The target can specify its preferred form with the
7808     // shouldNormalizeToSelectSequence() callback. However we always transform
7809     // to the right anyway if we find the inner select exists in the DAG anyway
7810     // and we always transform to the left side if we know that we can further
7811     // optimize the combination of the conditions.
7812     bool normalizeToSequence =
7813         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7814     // select (and Cond0, Cond1), X, Y
7815     //   -> select Cond0, (select Cond1, X, Y), Y
7816     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7817       SDValue Cond0 = N0->getOperand(0);
7818       SDValue Cond1 = N0->getOperand(1);
7819       SDValue InnerSelect =
7820           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7821       if (normalizeToSequence || !InnerSelect.use_empty())
7822         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7823                            InnerSelect, N2);
7824       // Cleanup on failure.
7825       if (InnerSelect.use_empty())
7826         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7827     }
7828     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7829     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7830       SDValue Cond0 = N0->getOperand(0);
7831       SDValue Cond1 = N0->getOperand(1);
7832       SDValue InnerSelect =
7833           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7834       if (normalizeToSequence || !InnerSelect.use_empty())
7835         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7836                            InnerSelect);
7837       // Cleanup on failure.
7838       if (InnerSelect.use_empty())
7839         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7840     }
7841 
7842     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7843     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7844       SDValue N1_0 = N1->getOperand(0);
7845       SDValue N1_1 = N1->getOperand(1);
7846       SDValue N1_2 = N1->getOperand(2);
7847       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7848         // Create the actual and node if we can generate good code for it.
7849         if (!normalizeToSequence) {
7850           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7851           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7852         }
7853         // Otherwise see if we can optimize the "and" to a better pattern.
7854         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7855           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7856                              N2);
7857       }
7858     }
7859     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7860     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7861       SDValue N2_0 = N2->getOperand(0);
7862       SDValue N2_1 = N2->getOperand(1);
7863       SDValue N2_2 = N2->getOperand(2);
7864       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7865         // Create the actual or node if we can generate good code for it.
7866         if (!normalizeToSequence) {
7867           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7868           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7869         }
7870         // Otherwise see if we can optimize to a better pattern.
7871         if (SDValue Combined = visitORLike(N0, N2_0, N))
7872           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7873                              N2_2);
7874       }
7875     }
7876   }
7877 
7878   // select (not Cond), N1, N2 -> select Cond, N2, N1
7879   if (SDValue F = extractBooleanFlip(N0, TLI))
7880     return DAG.getSelect(DL, VT, F, N2, N1);
7881 
7882   // Fold selects based on a setcc into other things, such as min/max/abs.
7883   if (N0.getOpcode() == ISD::SETCC) {
7884     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7885     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7886 
7887     // select (fcmp lt x, y), x, y -> fminnum x, y
7888     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7889     //
7890     // This is OK if we don't care what happens if either operand is a NaN.
7891     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
7892       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7893                                                 CC, TLI, DAG))
7894         return FMinMax;
7895 
7896     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7897     // This is conservatively limited to pre-legal-operations to give targets
7898     // a chance to reverse the transform if they want to do that. Also, it is
7899     // unlikely that the pattern would be formed late, so it's probably not
7900     // worth going through the other checks.
7901     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7902         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7903         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7904       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7905       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7906       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7907         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7908         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7909         //
7910         // The IR equivalent of this transform would have this form:
7911         //   %a = add %x, C
7912         //   %c = icmp ugt %x, ~C
7913         //   %r = select %c, -1, %a
7914         //   =>
7915         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7916         //   %u0 = extractvalue %u, 0
7917         //   %u1 = extractvalue %u, 1
7918         //   %r = select %u1, -1, %u0
7919         SDVTList VTs = DAG.getVTList(VT, VT0);
7920         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7921         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7922       }
7923     }
7924 
7925     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7926         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7927       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7928                          N0.getOperand(2));
7929 
7930     return SimplifySelect(DL, N0, N1, N2);
7931   }
7932 
7933   return SDValue();
7934 }
7935 
7936 static
7937 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7938   SDLoc DL(N);
7939   EVT LoVT, HiVT;
7940   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7941 
7942   // Split the inputs.
7943   SDValue Lo, Hi, LL, LH, RL, RH;
7944   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7945   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7946 
7947   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7948   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7949 
7950   return std::make_pair(Lo, Hi);
7951 }
7952 
7953 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7954 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7955 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7956   SDLoc DL(N);
7957   SDValue Cond = N->getOperand(0);
7958   SDValue LHS = N->getOperand(1);
7959   SDValue RHS = N->getOperand(2);
7960   EVT VT = N->getValueType(0);
7961   int NumElems = VT.getVectorNumElements();
7962   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7963          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7964          Cond.getOpcode() == ISD::BUILD_VECTOR);
7965 
7966   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7967   // binary ones here.
7968   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7969     return SDValue();
7970 
7971   // We're sure we have an even number of elements due to the
7972   // concat_vectors we have as arguments to vselect.
7973   // Skip BV elements until we find one that's not an UNDEF
7974   // After we find an UNDEF element, keep looping until we get to half the
7975   // length of the BV and see if all the non-undef nodes are the same.
7976   ConstantSDNode *BottomHalf = nullptr;
7977   for (int i = 0; i < NumElems / 2; ++i) {
7978     if (Cond->getOperand(i)->isUndef())
7979       continue;
7980 
7981     if (BottomHalf == nullptr)
7982       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7983     else if (Cond->getOperand(i).getNode() != BottomHalf)
7984       return SDValue();
7985   }
7986 
7987   // Do the same for the second half of the BuildVector
7988   ConstantSDNode *TopHalf = nullptr;
7989   for (int i = NumElems / 2; i < NumElems; ++i) {
7990     if (Cond->getOperand(i)->isUndef())
7991       continue;
7992 
7993     if (TopHalf == nullptr)
7994       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7995     else if (Cond->getOperand(i).getNode() != TopHalf)
7996       return SDValue();
7997   }
7998 
7999   assert(TopHalf && BottomHalf &&
8000          "One half of the selector was all UNDEFs and the other was all the "
8001          "same value. This should have been addressed before this function.");
8002   return DAG.getNode(
8003       ISD::CONCAT_VECTORS, DL, VT,
8004       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8005       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8006 }
8007 
8008 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8009   if (Level >= AfterLegalizeTypes)
8010     return SDValue();
8011 
8012   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8013   SDValue Mask = MSC->getMask();
8014   SDValue Data  = MSC->getValue();
8015   SDLoc DL(N);
8016 
8017   // If the MSCATTER data type requires splitting and the mask is provided by a
8018   // SETCC, then split both nodes and its operands before legalization. This
8019   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8020   // and enables future optimizations (e.g. min/max pattern matching on X86).
8021   if (Mask.getOpcode() != ISD::SETCC)
8022     return SDValue();
8023 
8024   // Check if any splitting is required.
8025   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
8026       TargetLowering::TypeSplitVector)
8027     return SDValue();
8028   SDValue MaskLo, MaskHi;
8029   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8030 
8031   EVT LoVT, HiVT;
8032   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
8033 
8034   SDValue Chain = MSC->getChain();
8035 
8036   EVT MemoryVT = MSC->getMemoryVT();
8037   unsigned Alignment = MSC->getOriginalAlignment();
8038 
8039   EVT LoMemVT, HiMemVT;
8040   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8041 
8042   SDValue DataLo, DataHi;
8043   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8044 
8045   SDValue Scale = MSC->getScale();
8046   SDValue BasePtr = MSC->getBasePtr();
8047   SDValue IndexLo, IndexHi;
8048   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
8049 
8050   MachineMemOperand *MMO = DAG.getMachineFunction().
8051     getMachineMemOperand(MSC->getPointerInfo(),
8052                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8053                           Alignment, MSC->getAAInfo(), MSC->getRanges());
8054 
8055   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
8056   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
8057                                     DataLo.getValueType(), DL, OpsLo, MMO);
8058 
8059   // The order of the Scatter operation after split is well defined. The "Hi"
8060   // part comes after the "Lo". So these two operations should be chained one
8061   // after another.
8062   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
8063   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
8064                               DL, OpsHi, MMO);
8065 }
8066 
8067 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8068   if (Level >= AfterLegalizeTypes)
8069     return SDValue();
8070 
8071   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8072   SDValue Mask = MST->getMask();
8073   SDValue Data  = MST->getValue();
8074   EVT VT = Data.getValueType();
8075   SDLoc DL(N);
8076 
8077   // If the MSTORE data type requires splitting and the mask is provided by a
8078   // SETCC, then split both nodes and its operands before legalization. This
8079   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8080   // and enables future optimizations (e.g. min/max pattern matching on X86).
8081   if (Mask.getOpcode() == ISD::SETCC) {
8082     // Check if any splitting is required.
8083     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8084         TargetLowering::TypeSplitVector)
8085       return SDValue();
8086 
8087     SDValue MaskLo, MaskHi, Lo, Hi;
8088     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8089 
8090     SDValue Chain = MST->getChain();
8091     SDValue Ptr   = MST->getBasePtr();
8092 
8093     EVT MemoryVT = MST->getMemoryVT();
8094     unsigned Alignment = MST->getOriginalAlignment();
8095 
8096     // if Alignment is equal to the vector size,
8097     // take the half of it for the second part
8098     unsigned SecondHalfAlignment =
8099       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
8100 
8101     EVT LoMemVT, HiMemVT;
8102     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8103 
8104     SDValue DataLo, DataHi;
8105     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8106 
8107     MachineMemOperand *MMO = DAG.getMachineFunction().
8108       getMachineMemOperand(MST->getPointerInfo(),
8109                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8110                            Alignment, MST->getAAInfo(), MST->getRanges());
8111 
8112     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8113                             MST->isTruncatingStore(),
8114                             MST->isCompressingStore());
8115 
8116     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8117                                      MST->isCompressingStore());
8118     unsigned HiOffset = LoMemVT.getStoreSize();
8119 
8120     MMO = DAG.getMachineFunction().getMachineMemOperand(
8121         MST->getPointerInfo().getWithOffset(HiOffset),
8122         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
8123         MST->getAAInfo(), MST->getRanges());
8124 
8125     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8126                             MST->isTruncatingStore(),
8127                             MST->isCompressingStore());
8128 
8129     AddToWorklist(Lo.getNode());
8130     AddToWorklist(Hi.getNode());
8131 
8132     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8133   }
8134   return SDValue();
8135 }
8136 
8137 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8138   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8139   SDValue Mask = MGT->getMask();
8140   SDLoc DL(N);
8141 
8142   // Zap gathers with a zero mask.
8143   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8144     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8145 
8146   if (Level >= AfterLegalizeTypes)
8147     return SDValue();
8148 
8149   // If the MGATHER result requires splitting and the mask is provided by a
8150   // SETCC, then split both nodes and its operands before legalization. This
8151   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8152   // and enables future optimizations (e.g. min/max pattern matching on X86).
8153 
8154   if (Mask.getOpcode() != ISD::SETCC)
8155     return SDValue();
8156 
8157   EVT VT = N->getValueType(0);
8158 
8159   // Check if any splitting is required.
8160   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8161       TargetLowering::TypeSplitVector)
8162     return SDValue();
8163 
8164   SDValue MaskLo, MaskHi, Lo, Hi;
8165   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8166 
8167   SDValue PassThru = MGT->getPassThru();
8168   SDValue PassThruLo, PassThruHi;
8169   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8170 
8171   EVT LoVT, HiVT;
8172   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8173 
8174   SDValue Chain = MGT->getChain();
8175   EVT MemoryVT = MGT->getMemoryVT();
8176   unsigned Alignment = MGT->getOriginalAlignment();
8177 
8178   EVT LoMemVT, HiMemVT;
8179   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8180 
8181   SDValue Scale = MGT->getScale();
8182   SDValue BasePtr = MGT->getBasePtr();
8183   SDValue Index = MGT->getIndex();
8184   SDValue IndexLo, IndexHi;
8185   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8186 
8187   MachineMemOperand *MMO = DAG.getMachineFunction().
8188     getMachineMemOperand(MGT->getPointerInfo(),
8189                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8190                           Alignment, MGT->getAAInfo(), MGT->getRanges());
8191 
8192   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8193   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8194                            MMO);
8195 
8196   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8197   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8198                            MMO);
8199 
8200   AddToWorklist(Lo.getNode());
8201   AddToWorklist(Hi.getNode());
8202 
8203   // Build a factor node to remember that this load is independent of the
8204   // other one.
8205   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8206                       Hi.getValue(1));
8207 
8208   // Legalized the chain result - switch anything that used the old chain to
8209   // use the new one.
8210   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8211 
8212   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8213 
8214   SDValue RetOps[] = { GatherRes, Chain };
8215   return DAG.getMergeValues(RetOps, DL);
8216 }
8217 
8218 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8219   if (Level >= AfterLegalizeTypes)
8220     return SDValue();
8221 
8222   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8223   SDValue Mask = MLD->getMask();
8224   SDLoc DL(N);
8225 
8226   // If the MLOAD result requires splitting and the mask is provided by a
8227   // SETCC, then split both nodes and its operands before legalization. This
8228   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8229   // and enables future optimizations (e.g. min/max pattern matching on X86).
8230   if (Mask.getOpcode() == ISD::SETCC) {
8231     EVT VT = N->getValueType(0);
8232 
8233     // Check if any splitting is required.
8234     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8235         TargetLowering::TypeSplitVector)
8236       return SDValue();
8237 
8238     SDValue MaskLo, MaskHi, Lo, Hi;
8239     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8240 
8241     SDValue PassThru = MLD->getPassThru();
8242     SDValue PassThruLo, PassThruHi;
8243     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8244 
8245     EVT LoVT, HiVT;
8246     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8247 
8248     SDValue Chain = MLD->getChain();
8249     SDValue Ptr   = MLD->getBasePtr();
8250     EVT MemoryVT = MLD->getMemoryVT();
8251     unsigned Alignment = MLD->getOriginalAlignment();
8252 
8253     // if Alignment is equal to the vector size,
8254     // take the half of it for the second part
8255     unsigned SecondHalfAlignment =
8256       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
8257          Alignment/2 : Alignment;
8258 
8259     EVT LoMemVT, HiMemVT;
8260     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8261 
8262     MachineMemOperand *MMO = DAG.getMachineFunction().
8263     getMachineMemOperand(MLD->getPointerInfo(),
8264                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8265                          Alignment, MLD->getAAInfo(), MLD->getRanges());
8266 
8267     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8268                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8269 
8270     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8271                                      MLD->isExpandingLoad());
8272     unsigned HiOffset = LoMemVT.getStoreSize();
8273 
8274     MMO = DAG.getMachineFunction().getMachineMemOperand(
8275         MLD->getPointerInfo().getWithOffset(HiOffset),
8276         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
8277         MLD->getAAInfo(), MLD->getRanges());
8278 
8279     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8280                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8281 
8282     AddToWorklist(Lo.getNode());
8283     AddToWorklist(Hi.getNode());
8284 
8285     // Build a factor node to remember that this load is independent of the
8286     // other one.
8287     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8288                         Hi.getValue(1));
8289 
8290     // Legalized the chain result - switch anything that used the old chain to
8291     // use the new one.
8292     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8293 
8294     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8295 
8296     SDValue RetOps[] = { LoadRes, Chain };
8297     return DAG.getMergeValues(RetOps, DL);
8298   }
8299   return SDValue();
8300 }
8301 
8302 /// A vector select of 2 constant vectors can be simplified to math/logic to
8303 /// avoid a variable select instruction and possibly avoid constant loads.
8304 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8305   SDValue Cond = N->getOperand(0);
8306   SDValue N1 = N->getOperand(1);
8307   SDValue N2 = N->getOperand(2);
8308   EVT VT = N->getValueType(0);
8309   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8310       !TLI.convertSelectOfConstantsToMath(VT) ||
8311       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8312       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8313     return SDValue();
8314 
8315   // Check if we can use the condition value to increment/decrement a single
8316   // constant value. This simplifies a select to an add and removes a constant
8317   // load/materialization from the general case.
8318   bool AllAddOne = true;
8319   bool AllSubOne = true;
8320   unsigned Elts = VT.getVectorNumElements();
8321   for (unsigned i = 0; i != Elts; ++i) {
8322     SDValue N1Elt = N1.getOperand(i);
8323     SDValue N2Elt = N2.getOperand(i);
8324     if (N1Elt.isUndef() || N2Elt.isUndef())
8325       continue;
8326 
8327     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8328     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8329     if (C1 != C2 + 1)
8330       AllAddOne = false;
8331     if (C1 != C2 - 1)
8332       AllSubOne = false;
8333   }
8334 
8335   // Further simplifications for the extra-special cases where the constants are
8336   // all 0 or all -1 should be implemented as folds of these patterns.
8337   SDLoc DL(N);
8338   if (AllAddOne || AllSubOne) {
8339     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8340     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8341     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8342     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8343     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8344   }
8345 
8346   // The general case for select-of-constants:
8347   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8348   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8349   // leave that to a machine-specific pass.
8350   return SDValue();
8351 }
8352 
8353 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8354   SDValue N0 = N->getOperand(0);
8355   SDValue N1 = N->getOperand(1);
8356   SDValue N2 = N->getOperand(2);
8357   EVT VT = N->getValueType(0);
8358   SDLoc DL(N);
8359 
8360   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8361     return V;
8362 
8363   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8364   if (SDValue F = extractBooleanFlip(N0, TLI))
8365     return DAG.getSelect(DL, VT, F, N2, N1);
8366 
8367   // Canonicalize integer abs.
8368   // vselect (setg[te] X,  0),  X, -X ->
8369   // vselect (setgt    X, -1),  X, -X ->
8370   // vselect (setl[te] X,  0), -X,  X ->
8371   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8372   if (N0.getOpcode() == ISD::SETCC) {
8373     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8374     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8375     bool isAbs = false;
8376     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8377 
8378     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8379          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8380         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8381       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8382     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8383              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8384       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8385 
8386     if (isAbs) {
8387       EVT VT = LHS.getValueType();
8388       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8389         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8390 
8391       SDValue Shift = DAG.getNode(
8392           ISD::SRA, DL, VT, LHS,
8393           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8394       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8395       AddToWorklist(Shift.getNode());
8396       AddToWorklist(Add.getNode());
8397       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8398     }
8399 
8400     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8401     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8402     //
8403     // This is OK if we don't care about what happens if either operand is a
8404     // NaN.
8405     //
8406     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8407                                                        N0.getOperand(1), TLI)) {
8408       if (SDValue FMinMax = combineMinNumMaxNum(
8409               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8410         return FMinMax;
8411     }
8412 
8413     // If this select has a condition (setcc) with narrower operands than the
8414     // select, try to widen the compare to match the select width.
8415     // TODO: This should be extended to handle any constant.
8416     // TODO: This could be extended to handle non-loading patterns, but that
8417     //       requires thorough testing to avoid regressions.
8418     if (isNullOrNullSplat(RHS)) {
8419       EVT NarrowVT = LHS.getValueType();
8420       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8421       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8422       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8423       unsigned WideWidth = WideVT.getScalarSizeInBits();
8424       bool IsSigned = isSignedIntSetCC(CC);
8425       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8426       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8427           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8428           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8429           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8430         // Both compare operands can be widened for free. The LHS can use an
8431         // extended load, and the RHS is a constant:
8432         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8433         //   vselect (setcc extload(X), C'), N1, N2
8434         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8435         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8436         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8437         EVT WideSetCCVT = getSetCCResultType(WideVT);
8438         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8439         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8440       }
8441     }
8442   }
8443 
8444   if (SimplifySelectOps(N, N1, N2))
8445     return SDValue(N, 0);  // Don't revisit N.
8446 
8447   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8448   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8449     return N1;
8450   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8451   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8452     return N2;
8453 
8454   // The ConvertSelectToConcatVector function is assuming both the above
8455   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8456   // and addressed.
8457   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8458       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8459       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8460     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8461       return CV;
8462   }
8463 
8464   if (SDValue V = foldVSelectOfConstants(N))
8465     return V;
8466 
8467   return SDValue();
8468 }
8469 
8470 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8471   SDValue N0 = N->getOperand(0);
8472   SDValue N1 = N->getOperand(1);
8473   SDValue N2 = N->getOperand(2);
8474   SDValue N3 = N->getOperand(3);
8475   SDValue N4 = N->getOperand(4);
8476   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8477 
8478   // fold select_cc lhs, rhs, x, x, cc -> x
8479   if (N2 == N3)
8480     return N2;
8481 
8482   // Determine if the condition we're dealing with is constant
8483   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8484                                   CC, SDLoc(N), false)) {
8485     AddToWorklist(SCC.getNode());
8486 
8487     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8488       if (!SCCC->isNullValue())
8489         return N2;    // cond always true -> true val
8490       else
8491         return N3;    // cond always false -> false val
8492     } else if (SCC->isUndef()) {
8493       // When the condition is UNDEF, just return the first operand. This is
8494       // coherent the DAG creation, no setcc node is created in this case
8495       return N2;
8496     } else if (SCC.getOpcode() == ISD::SETCC) {
8497       // Fold to a simpler select_cc
8498       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8499                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8500                          SCC.getOperand(2));
8501     }
8502   }
8503 
8504   // If we can fold this based on the true/false value, do so.
8505   if (SimplifySelectOps(N, N2, N3))
8506     return SDValue(N, 0);  // Don't revisit N.
8507 
8508   // fold select_cc into other things, such as min/max/abs
8509   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8510 }
8511 
8512 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8513   // setcc is very commonly used as an argument to brcond. This pattern
8514   // also lend itself to numerous combines and, as a result, it is desired
8515   // we keep the argument to a brcond as a setcc as much as possible.
8516   bool PreferSetCC =
8517       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8518 
8519   SDValue Combined = SimplifySetCC(
8520       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8521       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8522 
8523   if (!Combined)
8524     return SDValue();
8525 
8526   // If we prefer to have a setcc, and we don't, we'll try our best to
8527   // recreate one using rebuildSetCC.
8528   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8529     SDValue NewSetCC = rebuildSetCC(Combined);
8530 
8531     // We don't have anything interesting to combine to.
8532     if (NewSetCC.getNode() == N)
8533       return SDValue();
8534 
8535     if (NewSetCC)
8536       return NewSetCC;
8537   }
8538 
8539   return Combined;
8540 }
8541 
8542 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8543   SDValue LHS = N->getOperand(0);
8544   SDValue RHS = N->getOperand(1);
8545   SDValue Carry = N->getOperand(2);
8546   SDValue Cond = N->getOperand(3);
8547 
8548   // If Carry is false, fold to a regular SETCC.
8549   if (isNullConstant(Carry))
8550     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8551 
8552   return SDValue();
8553 }
8554 
8555 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8556 /// a build_vector of constants.
8557 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8558 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8559 /// Vector extends are not folded if operations are legal; this is to
8560 /// avoid introducing illegal build_vector dag nodes.
8561 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8562                                          SelectionDAG &DAG, bool LegalTypes) {
8563   unsigned Opcode = N->getOpcode();
8564   SDValue N0 = N->getOperand(0);
8565   EVT VT = N->getValueType(0);
8566 
8567   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8568          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8569          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8570          && "Expected EXTEND dag node in input!");
8571 
8572   // fold (sext c1) -> c1
8573   // fold (zext c1) -> c1
8574   // fold (aext c1) -> c1
8575   if (isa<ConstantSDNode>(N0))
8576     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8577 
8578   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8579   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8580   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8581   EVT SVT = VT.getScalarType();
8582   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8583       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8584     return SDValue();
8585 
8586   // We can fold this node into a build_vector.
8587   unsigned VTBits = SVT.getSizeInBits();
8588   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8589   SmallVector<SDValue, 8> Elts;
8590   unsigned NumElts = VT.getVectorNumElements();
8591   SDLoc DL(N);
8592 
8593   // For zero-extensions, UNDEF elements still guarantee to have the upper
8594   // bits set to zero.
8595   bool IsZext =
8596       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8597 
8598   for (unsigned i = 0; i != NumElts; ++i) {
8599     SDValue Op = N0.getOperand(i);
8600     if (Op.isUndef()) {
8601       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8602       continue;
8603     }
8604 
8605     SDLoc DL(Op);
8606     // Get the constant value and if needed trunc it to the size of the type.
8607     // Nodes like build_vector might have constants wider than the scalar type.
8608     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8609     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8610       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8611     else
8612       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8613   }
8614 
8615   return DAG.getBuildVector(VT, DL, Elts);
8616 }
8617 
8618 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8619 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8620 // transformation. Returns true if extension are possible and the above
8621 // mentioned transformation is profitable.
8622 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8623                                     unsigned ExtOpc,
8624                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8625                                     const TargetLowering &TLI) {
8626   bool HasCopyToRegUses = false;
8627   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8628   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8629                             UE = N0.getNode()->use_end();
8630        UI != UE; ++UI) {
8631     SDNode *User = *UI;
8632     if (User == N)
8633       continue;
8634     if (UI.getUse().getResNo() != N0.getResNo())
8635       continue;
8636     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8637     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8638       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8639       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8640         // Sign bits will be lost after a zext.
8641         return false;
8642       bool Add = false;
8643       for (unsigned i = 0; i != 2; ++i) {
8644         SDValue UseOp = User->getOperand(i);
8645         if (UseOp == N0)
8646           continue;
8647         if (!isa<ConstantSDNode>(UseOp))
8648           return false;
8649         Add = true;
8650       }
8651       if (Add)
8652         ExtendNodes.push_back(User);
8653       continue;
8654     }
8655     // If truncates aren't free and there are users we can't
8656     // extend, it isn't worthwhile.
8657     if (!isTruncFree)
8658       return false;
8659     // Remember if this value is live-out.
8660     if (User->getOpcode() == ISD::CopyToReg)
8661       HasCopyToRegUses = true;
8662   }
8663 
8664   if (HasCopyToRegUses) {
8665     bool BothLiveOut = false;
8666     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8667          UI != UE; ++UI) {
8668       SDUse &Use = UI.getUse();
8669       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8670         BothLiveOut = true;
8671         break;
8672       }
8673     }
8674     if (BothLiveOut)
8675       // Both unextended and extended values are live out. There had better be
8676       // a good reason for the transformation.
8677       return ExtendNodes.size();
8678   }
8679   return true;
8680 }
8681 
8682 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8683                                   SDValue OrigLoad, SDValue ExtLoad,
8684                                   ISD::NodeType ExtType) {
8685   // Extend SetCC uses if necessary.
8686   SDLoc DL(ExtLoad);
8687   for (SDNode *SetCC : SetCCs) {
8688     SmallVector<SDValue, 4> Ops;
8689 
8690     for (unsigned j = 0; j != 2; ++j) {
8691       SDValue SOp = SetCC->getOperand(j);
8692       if (SOp == OrigLoad)
8693         Ops.push_back(ExtLoad);
8694       else
8695         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8696     }
8697 
8698     Ops.push_back(SetCC->getOperand(2));
8699     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8700   }
8701 }
8702 
8703 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8704 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8705   SDValue N0 = N->getOperand(0);
8706   EVT DstVT = N->getValueType(0);
8707   EVT SrcVT = N0.getValueType();
8708 
8709   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8710           N->getOpcode() == ISD::ZERO_EXTEND) &&
8711          "Unexpected node type (not an extend)!");
8712 
8713   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8714   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8715   //   (v8i32 (sext (v8i16 (load x))))
8716   // into:
8717   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8718   //                          (v4i32 (sextload (x + 16)))))
8719   // Where uses of the original load, i.e.:
8720   //   (v8i16 (load x))
8721   // are replaced with:
8722   //   (v8i16 (truncate
8723   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8724   //                            (v4i32 (sextload (x + 16)))))))
8725   //
8726   // This combine is only applicable to illegal, but splittable, vectors.
8727   // All legal types, and illegal non-vector types, are handled elsewhere.
8728   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8729   //
8730   if (N0->getOpcode() != ISD::LOAD)
8731     return SDValue();
8732 
8733   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8734 
8735   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8736       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8737       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8738     return SDValue();
8739 
8740   SmallVector<SDNode *, 4> SetCCs;
8741   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8742     return SDValue();
8743 
8744   ISD::LoadExtType ExtType =
8745       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8746 
8747   // Try to split the vector types to get down to legal types.
8748   EVT SplitSrcVT = SrcVT;
8749   EVT SplitDstVT = DstVT;
8750   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8751          SplitSrcVT.getVectorNumElements() > 1) {
8752     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8753     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8754   }
8755 
8756   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8757     return SDValue();
8758 
8759   SDLoc DL(N);
8760   const unsigned NumSplits =
8761       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8762   const unsigned Stride = SplitSrcVT.getStoreSize();
8763   SmallVector<SDValue, 4> Loads;
8764   SmallVector<SDValue, 4> Chains;
8765 
8766   SDValue BasePtr = LN0->getBasePtr();
8767   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8768     const unsigned Offset = Idx * Stride;
8769     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8770 
8771     SDValue SplitLoad = DAG.getExtLoad(
8772         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8773         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8774         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8775 
8776     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8777                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8778 
8779     Loads.push_back(SplitLoad.getValue(0));
8780     Chains.push_back(SplitLoad.getValue(1));
8781   }
8782 
8783   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8784   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8785 
8786   // Simplify TF.
8787   AddToWorklist(NewChain.getNode());
8788 
8789   CombineTo(N, NewValue);
8790 
8791   // Replace uses of the original load (before extension)
8792   // with a truncate of the concatenated sextloaded vectors.
8793   SDValue Trunc =
8794       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8795   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8796   CombineTo(N0.getNode(), Trunc, NewChain);
8797   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8798 }
8799 
8800 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8801 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8802 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8803   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8804   EVT VT = N->getValueType(0);
8805   EVT OrigVT = N->getOperand(0).getValueType();
8806   if (TLI.isZExtFree(OrigVT, VT))
8807     return SDValue();
8808 
8809   // and/or/xor
8810   SDValue N0 = N->getOperand(0);
8811   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8812         N0.getOpcode() == ISD::XOR) ||
8813       N0.getOperand(1).getOpcode() != ISD::Constant ||
8814       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8815     return SDValue();
8816 
8817   // shl/shr
8818   SDValue N1 = N0->getOperand(0);
8819   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8820       N1.getOperand(1).getOpcode() != ISD::Constant ||
8821       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8822     return SDValue();
8823 
8824   // load
8825   if (!isa<LoadSDNode>(N1.getOperand(0)))
8826     return SDValue();
8827   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8828   EVT MemVT = Load->getMemoryVT();
8829   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8830       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8831     return SDValue();
8832 
8833 
8834   // If the shift op is SHL, the logic op must be AND, otherwise the result
8835   // will be wrong.
8836   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8837     return SDValue();
8838 
8839   if (!N0.hasOneUse() || !N1.hasOneUse())
8840     return SDValue();
8841 
8842   SmallVector<SDNode*, 4> SetCCs;
8843   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8844                                ISD::ZERO_EXTEND, SetCCs, TLI))
8845     return SDValue();
8846 
8847   // Actually do the transformation.
8848   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8849                                    Load->getChain(), Load->getBasePtr(),
8850                                    Load->getMemoryVT(), Load->getMemOperand());
8851 
8852   SDLoc DL1(N1);
8853   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8854                               N1.getOperand(1));
8855 
8856   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8857   Mask = Mask.zext(VT.getSizeInBits());
8858   SDLoc DL0(N0);
8859   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8860                             DAG.getConstant(Mask, DL0, VT));
8861 
8862   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8863   CombineTo(N, And);
8864   if (SDValue(Load, 0).hasOneUse()) {
8865     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8866   } else {
8867     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8868                                 Load->getValueType(0), ExtLoad);
8869     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8870   }
8871 
8872   // N0 is dead at this point.
8873   recursivelyDeleteUnusedNodes(N0.getNode());
8874 
8875   return SDValue(N,0); // Return N so it doesn't get rechecked!
8876 }
8877 
8878 /// If we're narrowing or widening the result of a vector select and the final
8879 /// size is the same size as a setcc (compare) feeding the select, then try to
8880 /// apply the cast operation to the select's operands because matching vector
8881 /// sizes for a select condition and other operands should be more efficient.
8882 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8883   unsigned CastOpcode = Cast->getOpcode();
8884   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8885           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8886           CastOpcode == ISD::FP_ROUND) &&
8887          "Unexpected opcode for vector select narrowing/widening");
8888 
8889   // We only do this transform before legal ops because the pattern may be
8890   // obfuscated by target-specific operations after legalization. Do not create
8891   // an illegal select op, however, because that may be difficult to lower.
8892   EVT VT = Cast->getValueType(0);
8893   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8894     return SDValue();
8895 
8896   SDValue VSel = Cast->getOperand(0);
8897   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8898       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8899     return SDValue();
8900 
8901   // Does the setcc have the same vector size as the casted select?
8902   SDValue SetCC = VSel.getOperand(0);
8903   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8904   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8905     return SDValue();
8906 
8907   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8908   SDValue A = VSel.getOperand(1);
8909   SDValue B = VSel.getOperand(2);
8910   SDValue CastA, CastB;
8911   SDLoc DL(Cast);
8912   if (CastOpcode == ISD::FP_ROUND) {
8913     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8914     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8915     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8916   } else {
8917     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8918     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8919   }
8920   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8921 }
8922 
8923 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8924 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8925 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8926                                      const TargetLowering &TLI, EVT VT,
8927                                      bool LegalOperations, SDNode *N,
8928                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8929   SDNode *N0Node = N0.getNode();
8930   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8931                                                    : ISD::isZEXTLoad(N0Node);
8932   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8933       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8934     return SDValue();
8935 
8936   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8937   EVT MemVT = LN0->getMemoryVT();
8938   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8939       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8940     return SDValue();
8941 
8942   SDValue ExtLoad =
8943       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8944                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8945   Combiner.CombineTo(N, ExtLoad);
8946   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8947   if (LN0->use_empty())
8948     Combiner.recursivelyDeleteUnusedNodes(LN0);
8949   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8950 }
8951 
8952 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8953 // Only generate vector extloads when 1) they're legal, and 2) they are
8954 // deemed desirable by the target.
8955 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8956                                   const TargetLowering &TLI, EVT VT,
8957                                   bool LegalOperations, SDNode *N, SDValue N0,
8958                                   ISD::LoadExtType ExtLoadType,
8959                                   ISD::NodeType ExtOpc) {
8960   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8961       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8962       ((LegalOperations || VT.isVector() ||
8963         cast<LoadSDNode>(N0)->isVolatile()) &&
8964        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8965     return {};
8966 
8967   bool DoXform = true;
8968   SmallVector<SDNode *, 4> SetCCs;
8969   if (!N0.hasOneUse())
8970     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8971   if (VT.isVector())
8972     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8973   if (!DoXform)
8974     return {};
8975 
8976   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8977   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8978                                    LN0->getBasePtr(), N0.getValueType(),
8979                                    LN0->getMemOperand());
8980   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8981   // If the load value is used only by N, replace it via CombineTo N.
8982   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8983   Combiner.CombineTo(N, ExtLoad);
8984   if (NoReplaceTrunc) {
8985     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8986     Combiner.recursivelyDeleteUnusedNodes(LN0);
8987   } else {
8988     SDValue Trunc =
8989         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8990     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8991   }
8992   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8993 }
8994 
8995 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8996                                        bool LegalOperations) {
8997   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8998           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8999 
9000   SDValue SetCC = N->getOperand(0);
9001   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9002       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9003     return SDValue();
9004 
9005   SDValue X = SetCC.getOperand(0);
9006   SDValue Ones = SetCC.getOperand(1);
9007   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9008   EVT VT = N->getValueType(0);
9009   EVT XVT = X.getValueType();
9010   // setge X, C is canonicalized to setgt, so we do not need to match that
9011   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9012   // not require the 'not' op.
9013   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9014     // Invert and smear/shift the sign bit:
9015     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9016     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9017     SDLoc DL(N);
9018     SDValue NotX = DAG.getNOT(DL, X, VT);
9019     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9020     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9021     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9022   }
9023   return SDValue();
9024 }
9025 
9026 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9027   SDValue N0 = N->getOperand(0);
9028   EVT VT = N->getValueType(0);
9029   SDLoc DL(N);
9030 
9031   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9032     return Res;
9033 
9034   // fold (sext (sext x)) -> (sext x)
9035   // fold (sext (aext x)) -> (sext x)
9036   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9037     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9038 
9039   if (N0.getOpcode() == ISD::TRUNCATE) {
9040     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9041     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9042     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9043       SDNode *oye = N0.getOperand(0).getNode();
9044       if (NarrowLoad.getNode() != N0.getNode()) {
9045         CombineTo(N0.getNode(), NarrowLoad);
9046         // CombineTo deleted the truncate, if needed, but not what's under it.
9047         AddToWorklist(oye);
9048       }
9049       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9050     }
9051 
9052     // See if the value being truncated is already sign extended.  If so, just
9053     // eliminate the trunc/sext pair.
9054     SDValue Op = N0.getOperand(0);
9055     unsigned OpBits   = Op.getScalarValueSizeInBits();
9056     unsigned MidBits  = N0.getScalarValueSizeInBits();
9057     unsigned DestBits = VT.getScalarSizeInBits();
9058     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9059 
9060     if (OpBits == DestBits) {
9061       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9062       // bits, it is already ready.
9063       if (NumSignBits > DestBits-MidBits)
9064         return Op;
9065     } else if (OpBits < DestBits) {
9066       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9067       // bits, just sext from i32.
9068       if (NumSignBits > OpBits-MidBits)
9069         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9070     } else {
9071       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9072       // bits, just truncate to i32.
9073       if (NumSignBits > OpBits-MidBits)
9074         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9075     }
9076 
9077     // fold (sext (truncate x)) -> (sextinreg x).
9078     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9079                                                  N0.getValueType())) {
9080       if (OpBits < DestBits)
9081         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9082       else if (OpBits > DestBits)
9083         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9084       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9085                          DAG.getValueType(N0.getValueType()));
9086     }
9087   }
9088 
9089   // Try to simplify (sext (load x)).
9090   if (SDValue foldedExt =
9091           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9092                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9093     return foldedExt;
9094 
9095   // fold (sext (load x)) to multiple smaller sextloads.
9096   // Only on illegal but splittable vectors.
9097   if (SDValue ExtLoad = CombineExtLoad(N))
9098     return ExtLoad;
9099 
9100   // Try to simplify (sext (sextload x)).
9101   if (SDValue foldedExt = tryToFoldExtOfExtload(
9102           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9103     return foldedExt;
9104 
9105   // fold (sext (and/or/xor (load x), cst)) ->
9106   //      (and/or/xor (sextload x), (sext cst))
9107   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9108        N0.getOpcode() == ISD::XOR) &&
9109       isa<LoadSDNode>(N0.getOperand(0)) &&
9110       N0.getOperand(1).getOpcode() == ISD::Constant &&
9111       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9112     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9113     EVT MemVT = LN00->getMemoryVT();
9114     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9115       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9116       SmallVector<SDNode*, 4> SetCCs;
9117       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9118                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9119       if (DoXform) {
9120         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9121                                          LN00->getChain(), LN00->getBasePtr(),
9122                                          LN00->getMemoryVT(),
9123                                          LN00->getMemOperand());
9124         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9125         Mask = Mask.sext(VT.getSizeInBits());
9126         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9127                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9128         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9129         bool NoReplaceTruncAnd = !N0.hasOneUse();
9130         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9131         CombineTo(N, And);
9132         // If N0 has multiple uses, change other uses as well.
9133         if (NoReplaceTruncAnd) {
9134           SDValue TruncAnd =
9135               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9136           CombineTo(N0.getNode(), TruncAnd);
9137         }
9138         if (NoReplaceTrunc) {
9139           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9140         } else {
9141           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9142                                       LN00->getValueType(0), ExtLoad);
9143           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9144         }
9145         return SDValue(N,0); // Return N so it doesn't get rechecked!
9146       }
9147     }
9148   }
9149 
9150   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9151     return V;
9152 
9153   if (N0.getOpcode() == ISD::SETCC) {
9154     SDValue N00 = N0.getOperand(0);
9155     SDValue N01 = N0.getOperand(1);
9156     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9157     EVT N00VT = N0.getOperand(0).getValueType();
9158 
9159     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9160     // Only do this before legalize for now.
9161     if (VT.isVector() && !LegalOperations &&
9162         TLI.getBooleanContents(N00VT) ==
9163             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9164       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9165       // of the same size as the compared operands. Only optimize sext(setcc())
9166       // if this is the case.
9167       EVT SVT = getSetCCResultType(N00VT);
9168 
9169       // If we already have the desired type, don't change it.
9170       if (SVT != N0.getValueType()) {
9171         // We know that the # elements of the results is the same as the
9172         // # elements of the compare (and the # elements of the compare result
9173         // for that matter).  Check to see that they are the same size.  If so,
9174         // we know that the element size of the sext'd result matches the
9175         // element size of the compare operands.
9176         if (VT.getSizeInBits() == SVT.getSizeInBits())
9177           return DAG.getSetCC(DL, VT, N00, N01, CC);
9178 
9179         // If the desired elements are smaller or larger than the source
9180         // elements, we can use a matching integer vector type and then
9181         // truncate/sign extend.
9182         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9183         if (SVT == MatchingVecType) {
9184           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9185           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9186         }
9187       }
9188     }
9189 
9190     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9191     // Here, T can be 1 or -1, depending on the type of the setcc and
9192     // getBooleanContents().
9193     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9194 
9195     // To determine the "true" side of the select, we need to know the high bit
9196     // of the value returned by the setcc if it evaluates to true.
9197     // If the type of the setcc is i1, then the true case of the select is just
9198     // sext(i1 1), that is, -1.
9199     // If the type of the setcc is larger (say, i8) then the value of the high
9200     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9201     // of the appropriate width.
9202     SDValue ExtTrueVal = (SetCCWidth == 1)
9203                              ? DAG.getAllOnesConstant(DL, VT)
9204                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9205     SDValue Zero = DAG.getConstant(0, DL, VT);
9206     if (SDValue SCC =
9207             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9208       return SCC;
9209 
9210     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9211       EVT SetCCVT = getSetCCResultType(N00VT);
9212       // Don't do this transform for i1 because there's a select transform
9213       // that would reverse it.
9214       // TODO: We should not do this transform at all without a target hook
9215       // because a sext is likely cheaper than a select?
9216       if (SetCCVT.getScalarSizeInBits() != 1 &&
9217           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9218         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9219         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9220       }
9221     }
9222   }
9223 
9224   // fold (sext x) -> (zext x) if the sign bit is known zero.
9225   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9226       DAG.SignBitIsZero(N0))
9227     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9228 
9229   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9230     return NewVSel;
9231 
9232   // Eliminate this sign extend by doing a negation in the destination type:
9233   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9234   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9235       isNullOrNullSplat(N0.getOperand(0)) &&
9236       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9237       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9238     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9239     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9240   }
9241   // Eliminate this sign extend by doing a decrement in the destination type:
9242   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9243   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9244       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9245       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9246       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9247     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9248     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9249   }
9250 
9251   return SDValue();
9252 }
9253 
9254 // isTruncateOf - If N is a truncate of some other value, return true, record
9255 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9256 // This function computes KnownBits to avoid a duplicated call to
9257 // computeKnownBits in the caller.
9258 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9259                          KnownBits &Known) {
9260   if (N->getOpcode() == ISD::TRUNCATE) {
9261     Op = N->getOperand(0);
9262     Known = DAG.computeKnownBits(Op);
9263     return true;
9264   }
9265 
9266   if (N.getOpcode() != ISD::SETCC ||
9267       N.getValueType().getScalarType() != MVT::i1 ||
9268       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9269     return false;
9270 
9271   SDValue Op0 = N->getOperand(0);
9272   SDValue Op1 = N->getOperand(1);
9273   assert(Op0.getValueType() == Op1.getValueType());
9274 
9275   if (isNullOrNullSplat(Op0))
9276     Op = Op1;
9277   else if (isNullOrNullSplat(Op1))
9278     Op = Op0;
9279   else
9280     return false;
9281 
9282   Known = DAG.computeKnownBits(Op);
9283 
9284   return (Known.Zero | 1).isAllOnesValue();
9285 }
9286 
9287 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9288   SDValue N0 = N->getOperand(0);
9289   EVT VT = N->getValueType(0);
9290 
9291   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9292     return Res;
9293 
9294   // fold (zext (zext x)) -> (zext x)
9295   // fold (zext (aext x)) -> (zext x)
9296   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9297     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9298                        N0.getOperand(0));
9299 
9300   // fold (zext (truncate x)) -> (zext x) or
9301   //      (zext (truncate x)) -> (truncate x)
9302   // This is valid when the truncated bits of x are already zero.
9303   SDValue Op;
9304   KnownBits Known;
9305   if (isTruncateOf(DAG, N0, Op, Known)) {
9306     APInt TruncatedBits =
9307       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9308       APInt(Op.getScalarValueSizeInBits(), 0) :
9309       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9310                         N0.getScalarValueSizeInBits(),
9311                         std::min(Op.getScalarValueSizeInBits(),
9312                                  VT.getScalarSizeInBits()));
9313     if (TruncatedBits.isSubsetOf(Known.Zero))
9314       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9315   }
9316 
9317   // fold (zext (truncate x)) -> (and x, mask)
9318   if (N0.getOpcode() == ISD::TRUNCATE) {
9319     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9320     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9321     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9322       SDNode *oye = N0.getOperand(0).getNode();
9323       if (NarrowLoad.getNode() != N0.getNode()) {
9324         CombineTo(N0.getNode(), NarrowLoad);
9325         // CombineTo deleted the truncate, if needed, but not what's under it.
9326         AddToWorklist(oye);
9327       }
9328       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9329     }
9330 
9331     EVT SrcVT = N0.getOperand(0).getValueType();
9332     EVT MinVT = N0.getValueType();
9333 
9334     // Try to mask before the extension to avoid having to generate a larger mask,
9335     // possibly over several sub-vectors.
9336     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9337       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9338                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9339         SDValue Op = N0.getOperand(0);
9340         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9341         AddToWorklist(Op.getNode());
9342         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9343         // Transfer the debug info; the new node is equivalent to N0.
9344         DAG.transferDbgValues(N0, ZExtOrTrunc);
9345         return ZExtOrTrunc;
9346       }
9347     }
9348 
9349     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9350       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9351       AddToWorklist(Op.getNode());
9352       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9353       // We may safely transfer the debug info describing the truncate node over
9354       // to the equivalent and operation.
9355       DAG.transferDbgValues(N0, And);
9356       return And;
9357     }
9358   }
9359 
9360   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9361   // if either of the casts is not free.
9362   if (N0.getOpcode() == ISD::AND &&
9363       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9364       N0.getOperand(1).getOpcode() == ISD::Constant &&
9365       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9366                            N0.getValueType()) ||
9367        !TLI.isZExtFree(N0.getValueType(), VT))) {
9368     SDValue X = N0.getOperand(0).getOperand(0);
9369     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9370     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9371     Mask = Mask.zext(VT.getSizeInBits());
9372     SDLoc DL(N);
9373     return DAG.getNode(ISD::AND, DL, VT,
9374                        X, DAG.getConstant(Mask, DL, VT));
9375   }
9376 
9377   // Try to simplify (zext (load x)).
9378   if (SDValue foldedExt =
9379           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9380                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9381     return foldedExt;
9382 
9383   // fold (zext (load x)) to multiple smaller zextloads.
9384   // Only on illegal but splittable vectors.
9385   if (SDValue ExtLoad = CombineExtLoad(N))
9386     return ExtLoad;
9387 
9388   // fold (zext (and/or/xor (load x), cst)) ->
9389   //      (and/or/xor (zextload x), (zext cst))
9390   // Unless (and (load x) cst) will match as a zextload already and has
9391   // additional users.
9392   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9393        N0.getOpcode() == ISD::XOR) &&
9394       isa<LoadSDNode>(N0.getOperand(0)) &&
9395       N0.getOperand(1).getOpcode() == ISD::Constant &&
9396       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9397     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9398     EVT MemVT = LN00->getMemoryVT();
9399     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9400         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9401       bool DoXform = true;
9402       SmallVector<SDNode*, 4> SetCCs;
9403       if (!N0.hasOneUse()) {
9404         if (N0.getOpcode() == ISD::AND) {
9405           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9406           EVT LoadResultTy = AndC->getValueType(0);
9407           EVT ExtVT;
9408           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9409             DoXform = false;
9410         }
9411       }
9412       if (DoXform)
9413         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9414                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9415       if (DoXform) {
9416         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9417                                          LN00->getChain(), LN00->getBasePtr(),
9418                                          LN00->getMemoryVT(),
9419                                          LN00->getMemOperand());
9420         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9421         Mask = Mask.zext(VT.getSizeInBits());
9422         SDLoc DL(N);
9423         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9424                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9425         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9426         bool NoReplaceTruncAnd = !N0.hasOneUse();
9427         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9428         CombineTo(N, And);
9429         // If N0 has multiple uses, change other uses as well.
9430         if (NoReplaceTruncAnd) {
9431           SDValue TruncAnd =
9432               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9433           CombineTo(N0.getNode(), TruncAnd);
9434         }
9435         if (NoReplaceTrunc) {
9436           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9437         } else {
9438           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9439                                       LN00->getValueType(0), ExtLoad);
9440           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9441         }
9442         return SDValue(N,0); // Return N so it doesn't get rechecked!
9443       }
9444     }
9445   }
9446 
9447   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9448   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9449   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9450     return ZExtLoad;
9451 
9452   // Try to simplify (zext (zextload x)).
9453   if (SDValue foldedExt = tryToFoldExtOfExtload(
9454           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9455     return foldedExt;
9456 
9457   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9458     return V;
9459 
9460   if (N0.getOpcode() == ISD::SETCC) {
9461     // Only do this before legalize for now.
9462     if (!LegalOperations && VT.isVector() &&
9463         N0.getValueType().getVectorElementType() == MVT::i1) {
9464       EVT N00VT = N0.getOperand(0).getValueType();
9465       if (getSetCCResultType(N00VT) == N0.getValueType())
9466         return SDValue();
9467 
9468       // We know that the # elements of the results is the same as the #
9469       // elements of the compare (and the # elements of the compare result for
9470       // that matter). Check to see that they are the same size. If so, we know
9471       // that the element size of the sext'd result matches the element size of
9472       // the compare operands.
9473       SDLoc DL(N);
9474       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9475       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9476         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9477         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9478                                      N0.getOperand(1), N0.getOperand(2));
9479         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9480       }
9481 
9482       // If the desired elements are smaller or larger than the source
9483       // elements we can use a matching integer vector type and then
9484       // truncate/sign extend.
9485       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9486       SDValue VsetCC =
9487           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9488                       N0.getOperand(1), N0.getOperand(2));
9489       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9490                          VecOnes);
9491     }
9492 
9493     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9494     SDLoc DL(N);
9495     if (SDValue SCC = SimplifySelectCC(
9496             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9497             DAG.getConstant(0, DL, VT),
9498             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9499       return SCC;
9500   }
9501 
9502   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9503   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9504       isa<ConstantSDNode>(N0.getOperand(1)) &&
9505       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9506       N0.hasOneUse()) {
9507     SDValue ShAmt = N0.getOperand(1);
9508     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9509     if (N0.getOpcode() == ISD::SHL) {
9510       SDValue InnerZExt = N0.getOperand(0);
9511       // If the original shl may be shifting out bits, do not perform this
9512       // transformation.
9513       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9514         InnerZExt.getOperand(0).getValueSizeInBits();
9515       if (ShAmtVal > KnownZeroBits)
9516         return SDValue();
9517     }
9518 
9519     SDLoc DL(N);
9520 
9521     // Ensure that the shift amount is wide enough for the shifted value.
9522     if (VT.getSizeInBits() >= 256)
9523       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9524 
9525     return DAG.getNode(N0.getOpcode(), DL, VT,
9526                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9527                        ShAmt);
9528   }
9529 
9530   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9531     return NewVSel;
9532 
9533   return SDValue();
9534 }
9535 
9536 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9537   SDValue N0 = N->getOperand(0);
9538   EVT VT = N->getValueType(0);
9539 
9540   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9541     return Res;
9542 
9543   // fold (aext (aext x)) -> (aext x)
9544   // fold (aext (zext x)) -> (zext x)
9545   // fold (aext (sext x)) -> (sext x)
9546   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9547       N0.getOpcode() == ISD::ZERO_EXTEND ||
9548       N0.getOpcode() == ISD::SIGN_EXTEND)
9549     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9550 
9551   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9552   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9553   if (N0.getOpcode() == ISD::TRUNCATE) {
9554     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9555       SDNode *oye = N0.getOperand(0).getNode();
9556       if (NarrowLoad.getNode() != N0.getNode()) {
9557         CombineTo(N0.getNode(), NarrowLoad);
9558         // CombineTo deleted the truncate, if needed, but not what's under it.
9559         AddToWorklist(oye);
9560       }
9561       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9562     }
9563   }
9564 
9565   // fold (aext (truncate x))
9566   if (N0.getOpcode() == ISD::TRUNCATE)
9567     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9568 
9569   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9570   // if the trunc is not free.
9571   if (N0.getOpcode() == ISD::AND &&
9572       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9573       N0.getOperand(1).getOpcode() == ISD::Constant &&
9574       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9575                           N0.getValueType())) {
9576     SDLoc DL(N);
9577     SDValue X = N0.getOperand(0).getOperand(0);
9578     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9579     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9580     Mask = Mask.zext(VT.getSizeInBits());
9581     return DAG.getNode(ISD::AND, DL, VT,
9582                        X, DAG.getConstant(Mask, DL, VT));
9583   }
9584 
9585   // fold (aext (load x)) -> (aext (truncate (extload x)))
9586   // None of the supported targets knows how to perform load and any_ext
9587   // on vectors in one instruction.  We only perform this transformation on
9588   // scalars.
9589   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9590       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9591       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9592     bool DoXform = true;
9593     SmallVector<SDNode*, 4> SetCCs;
9594     if (!N0.hasOneUse())
9595       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9596                                         TLI);
9597     if (DoXform) {
9598       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9599       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9600                                        LN0->getChain(),
9601                                        LN0->getBasePtr(), N0.getValueType(),
9602                                        LN0->getMemOperand());
9603       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9604       // If the load value is used only by N, replace it via CombineTo N.
9605       bool NoReplaceTrunc = N0.hasOneUse();
9606       CombineTo(N, ExtLoad);
9607       if (NoReplaceTrunc) {
9608         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9609         recursivelyDeleteUnusedNodes(LN0);
9610       } else {
9611         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9612                                     N0.getValueType(), ExtLoad);
9613         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9614       }
9615       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9616     }
9617   }
9618 
9619   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9620   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9621   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9622   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9623       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9624     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9625     ISD::LoadExtType ExtType = LN0->getExtensionType();
9626     EVT MemVT = LN0->getMemoryVT();
9627     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9628       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9629                                        VT, LN0->getChain(), LN0->getBasePtr(),
9630                                        MemVT, LN0->getMemOperand());
9631       CombineTo(N, ExtLoad);
9632       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9633       recursivelyDeleteUnusedNodes(LN0);
9634       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9635     }
9636   }
9637 
9638   if (N0.getOpcode() == ISD::SETCC) {
9639     // For vectors:
9640     // aext(setcc) -> vsetcc
9641     // aext(setcc) -> truncate(vsetcc)
9642     // aext(setcc) -> aext(vsetcc)
9643     // Only do this before legalize for now.
9644     if (VT.isVector() && !LegalOperations) {
9645       EVT N00VT = N0.getOperand(0).getValueType();
9646       if (getSetCCResultType(N00VT) == N0.getValueType())
9647         return SDValue();
9648 
9649       // We know that the # elements of the results is the same as the
9650       // # elements of the compare (and the # elements of the compare result
9651       // for that matter).  Check to see that they are the same size.  If so,
9652       // we know that the element size of the sext'd result matches the
9653       // element size of the compare operands.
9654       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9655         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9656                              N0.getOperand(1),
9657                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9658 
9659       // If the desired elements are smaller or larger than the source
9660       // elements we can use a matching integer vector type and then
9661       // truncate/any extend
9662       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9663       SDValue VsetCC =
9664         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9665                       N0.getOperand(1),
9666                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9667       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9668     }
9669 
9670     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9671     SDLoc DL(N);
9672     if (SDValue SCC = SimplifySelectCC(
9673             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9674             DAG.getConstant(0, DL, VT),
9675             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9676       return SCC;
9677   }
9678 
9679   return SDValue();
9680 }
9681 
9682 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9683   unsigned Opcode = N->getOpcode();
9684   SDValue N0 = N->getOperand(0);
9685   SDValue N1 = N->getOperand(1);
9686   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9687 
9688   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9689   if (N0.getOpcode() == Opcode &&
9690       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9691     return N0;
9692 
9693   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9694       N0.getOperand(0).getOpcode() == Opcode) {
9695     // We have an assert, truncate, assert sandwich. Make one stronger assert
9696     // by asserting on the smallest asserted type to the larger source type.
9697     // This eliminates the later assert:
9698     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9699     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9700     SDValue BigA = N0.getOperand(0);
9701     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9702     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9703            "Asserting zero/sign-extended bits to a type larger than the "
9704            "truncated destination does not provide information");
9705 
9706     SDLoc DL(N);
9707     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9708     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9709     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9710                                     BigA.getOperand(0), MinAssertVTVal);
9711     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9712   }
9713 
9714   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9715   // than X. Just move the AssertZext in front of the truncate and drop the
9716   // AssertSExt.
9717   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9718       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9719       Opcode == ISD::AssertZext) {
9720     SDValue BigA = N0.getOperand(0);
9721     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9722     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9723            "Asserting zero/sign-extended bits to a type larger than the "
9724            "truncated destination does not provide information");
9725 
9726     if (AssertVT.bitsLT(BigA_AssertVT)) {
9727       SDLoc DL(N);
9728       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9729                                       BigA.getOperand(0), N1);
9730       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9731     }
9732   }
9733 
9734   return SDValue();
9735 }
9736 
9737 /// If the result of a wider load is shifted to right of N  bits and then
9738 /// truncated to a narrower type and where N is a multiple of number of bits of
9739 /// the narrower type, transform it to a narrower load from address + N / num of
9740 /// bits of new type. Also narrow the load if the result is masked with an AND
9741 /// to effectively produce a smaller type. If the result is to be extended, also
9742 /// fold the extension to form a extending load.
9743 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9744   unsigned Opc = N->getOpcode();
9745 
9746   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9747   SDValue N0 = N->getOperand(0);
9748   EVT VT = N->getValueType(0);
9749   EVT ExtVT = VT;
9750 
9751   // This transformation isn't valid for vector loads.
9752   if (VT.isVector())
9753     return SDValue();
9754 
9755   unsigned ShAmt = 0;
9756   bool HasShiftedOffset = false;
9757   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9758   // extended to VT.
9759   if (Opc == ISD::SIGN_EXTEND_INREG) {
9760     ExtType = ISD::SEXTLOAD;
9761     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9762   } else if (Opc == ISD::SRL) {
9763     // Another special-case: SRL is basically zero-extending a narrower value,
9764     // or it maybe shifting a higher subword, half or byte into the lowest
9765     // bits.
9766     ExtType = ISD::ZEXTLOAD;
9767     N0 = SDValue(N, 0);
9768 
9769     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9770     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9771     if (!N01 || !LN0)
9772       return SDValue();
9773 
9774     uint64_t ShiftAmt = N01->getZExtValue();
9775     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9776     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9777       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9778     else
9779       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9780                                 VT.getSizeInBits() - ShiftAmt);
9781   } else if (Opc == ISD::AND) {
9782     // An AND with a constant mask is the same as a truncate + zero-extend.
9783     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9784     if (!AndC)
9785       return SDValue();
9786 
9787     const APInt &Mask = AndC->getAPIntValue();
9788     unsigned ActiveBits = 0;
9789     if (Mask.isMask()) {
9790       ActiveBits = Mask.countTrailingOnes();
9791     } else if (Mask.isShiftedMask()) {
9792       ShAmt = Mask.countTrailingZeros();
9793       APInt ShiftedMask = Mask.lshr(ShAmt);
9794       ActiveBits = ShiftedMask.countTrailingOnes();
9795       HasShiftedOffset = true;
9796     } else
9797       return SDValue();
9798 
9799     ExtType = ISD::ZEXTLOAD;
9800     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9801   }
9802 
9803   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9804     SDValue SRL = N0;
9805     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9806       ShAmt = ConstShift->getZExtValue();
9807       unsigned EVTBits = ExtVT.getSizeInBits();
9808       // Is the shift amount a multiple of size of VT?
9809       if ((ShAmt & (EVTBits-1)) == 0) {
9810         N0 = N0.getOperand(0);
9811         // Is the load width a multiple of size of VT?
9812         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9813           return SDValue();
9814       }
9815 
9816       // At this point, we must have a load or else we can't do the transform.
9817       if (!isa<LoadSDNode>(N0)) return SDValue();
9818 
9819       auto *LN0 = cast<LoadSDNode>(N0);
9820 
9821       // Because a SRL must be assumed to *need* to zero-extend the high bits
9822       // (as opposed to anyext the high bits), we can't combine the zextload
9823       // lowering of SRL and an sextload.
9824       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9825         return SDValue();
9826 
9827       // If the shift amount is larger than the input type then we're not
9828       // accessing any of the loaded bytes.  If the load was a zextload/extload
9829       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9830       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9831         return SDValue();
9832 
9833       // If the SRL is only used by a masking AND, we may be able to adjust
9834       // the ExtVT to make the AND redundant.
9835       SDNode *Mask = *(SRL->use_begin());
9836       if (Mask->getOpcode() == ISD::AND &&
9837           isa<ConstantSDNode>(Mask->getOperand(1))) {
9838         const APInt &ShiftMask =
9839           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9840         if (ShiftMask.isMask()) {
9841           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9842                                            ShiftMask.countTrailingOnes());
9843           // If the mask is smaller, recompute the type.
9844           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9845               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9846             ExtVT = MaskedVT;
9847         }
9848       }
9849     }
9850   }
9851 
9852   // If the load is shifted left (and the result isn't shifted back right),
9853   // we can fold the truncate through the shift.
9854   unsigned ShLeftAmt = 0;
9855   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9856       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9857     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9858       ShLeftAmt = N01->getZExtValue();
9859       N0 = N0.getOperand(0);
9860     }
9861   }
9862 
9863   // If we haven't found a load, we can't narrow it.
9864   if (!isa<LoadSDNode>(N0))
9865     return SDValue();
9866 
9867   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9868   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9869     return SDValue();
9870 
9871   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9872     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9873     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9874     return LVTStoreBits - EVTStoreBits - ShAmt;
9875   };
9876 
9877   // For big endian targets, we need to adjust the offset to the pointer to
9878   // load the correct bytes.
9879   if (DAG.getDataLayout().isBigEndian())
9880     ShAmt = AdjustBigEndianShift(ShAmt);
9881 
9882   EVT PtrType = N0.getOperand(1).getValueType();
9883   uint64_t PtrOff = ShAmt / 8;
9884   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9885   SDLoc DL(LN0);
9886   // The original load itself didn't wrap, so an offset within it doesn't.
9887   SDNodeFlags Flags;
9888   Flags.setNoUnsignedWrap(true);
9889   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9890                                PtrType, LN0->getBasePtr(),
9891                                DAG.getConstant(PtrOff, DL, PtrType),
9892                                Flags);
9893   AddToWorklist(NewPtr.getNode());
9894 
9895   SDValue Load;
9896   if (ExtType == ISD::NON_EXTLOAD)
9897     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9898                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9899                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9900   else
9901     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9902                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9903                           NewAlign, LN0->getMemOperand()->getFlags(),
9904                           LN0->getAAInfo());
9905 
9906   // Replace the old load's chain with the new load's chain.
9907   WorklistRemover DeadNodes(*this);
9908   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9909 
9910   // Shift the result left, if we've swallowed a left shift.
9911   SDValue Result = Load;
9912   if (ShLeftAmt != 0) {
9913     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9914     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9915       ShImmTy = VT;
9916     // If the shift amount is as large as the result size (but, presumably,
9917     // no larger than the source) then the useful bits of the result are
9918     // zero; we can't simply return the shortened shift, because the result
9919     // of that operation is undefined.
9920     SDLoc DL(N0);
9921     if (ShLeftAmt >= VT.getSizeInBits())
9922       Result = DAG.getConstant(0, DL, VT);
9923     else
9924       Result = DAG.getNode(ISD::SHL, DL, VT,
9925                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9926   }
9927 
9928   if (HasShiftedOffset) {
9929     // Recalculate the shift amount after it has been altered to calculate
9930     // the offset.
9931     if (DAG.getDataLayout().isBigEndian())
9932       ShAmt = AdjustBigEndianShift(ShAmt);
9933 
9934     // We're using a shifted mask, so the load now has an offset. This means
9935     // that data has been loaded into the lower bytes than it would have been
9936     // before, so we need to shl the loaded data into the correct position in the
9937     // register.
9938     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9939     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9940     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9941   }
9942 
9943   // Return the new loaded value.
9944   return Result;
9945 }
9946 
9947 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9948   SDValue N0 = N->getOperand(0);
9949   SDValue N1 = N->getOperand(1);
9950   EVT VT = N->getValueType(0);
9951   EVT EVT = cast<VTSDNode>(N1)->getVT();
9952   unsigned VTBits = VT.getScalarSizeInBits();
9953   unsigned EVTBits = EVT.getScalarSizeInBits();
9954 
9955   if (N0.isUndef())
9956     return DAG.getUNDEF(VT);
9957 
9958   // fold (sext_in_reg c1) -> c1
9959   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9960     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9961 
9962   // If the input is already sign extended, just drop the extension.
9963   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9964     return N0;
9965 
9966   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9967   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9968       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9969     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9970                        N0.getOperand(0), N1);
9971 
9972   // fold (sext_in_reg (sext x)) -> (sext x)
9973   // fold (sext_in_reg (aext x)) -> (sext x)
9974   // if x is small enough or if we know that x has more than 1 sign bit and the
9975   // sign_extend_inreg is extending from one of them.
9976   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9977     SDValue N00 = N0.getOperand(0);
9978     unsigned N00Bits = N00.getScalarValueSizeInBits();
9979     if ((N00Bits <= EVTBits ||
9980          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9981         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9982       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9983   }
9984 
9985   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9986   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9987        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9988        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9989       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9990     if (!LegalOperations ||
9991         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9992       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9993                          N0.getOperand(0));
9994   }
9995 
9996   // fold (sext_in_reg (zext x)) -> (sext x)
9997   // iff we are extending the source sign bit.
9998   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9999     SDValue N00 = N0.getOperand(0);
10000     if (N00.getScalarValueSizeInBits() == EVTBits &&
10001         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10002       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10003   }
10004 
10005   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10006   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10007     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10008 
10009   // fold operands of sext_in_reg based on knowledge that the top bits are not
10010   // demanded.
10011   if (SimplifyDemandedBits(SDValue(N, 0)))
10012     return SDValue(N, 0);
10013 
10014   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10015   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10016   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10017     return NarrowLoad;
10018 
10019   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10020   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10021   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10022   if (N0.getOpcode() == ISD::SRL) {
10023     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10024       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
10025         // We can turn this into an SRA iff the input to the SRL is already sign
10026         // extended enough.
10027         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10028         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
10029           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
10030                              N0.getOperand(0), N0.getOperand(1));
10031       }
10032   }
10033 
10034   // fold (sext_inreg (extload x)) -> (sextload x)
10035   // If sextload is not supported by target, we can only do the combine when
10036   // load has one use. Doing otherwise can block folding the extload with other
10037   // extends that the target does support.
10038   if (ISD::isEXTLoad(N0.getNode()) &&
10039       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10040       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10041       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
10042         N0.hasOneUse()) ||
10043        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10044     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10045     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10046                                      LN0->getChain(),
10047                                      LN0->getBasePtr(), EVT,
10048                                      LN0->getMemOperand());
10049     CombineTo(N, ExtLoad);
10050     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10051     AddToWorklist(ExtLoad.getNode());
10052     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10053   }
10054   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10055   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10056       N0.hasOneUse() &&
10057       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10058       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10059        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10060     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10061     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10062                                      LN0->getChain(),
10063                                      LN0->getBasePtr(), EVT,
10064                                      LN0->getMemOperand());
10065     CombineTo(N, ExtLoad);
10066     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10067     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10068   }
10069 
10070   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10071   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10072     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10073                                            N0.getOperand(1), false))
10074       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10075                          BSwap, N1);
10076   }
10077 
10078   return SDValue();
10079 }
10080 
10081 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10082   SDValue N0 = N->getOperand(0);
10083   EVT VT = N->getValueType(0);
10084 
10085   if (N0.isUndef())
10086     return DAG.getUNDEF(VT);
10087 
10088   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10089     return Res;
10090 
10091   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10092     return SDValue(N, 0);
10093 
10094   return SDValue();
10095 }
10096 
10097 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10098   SDValue N0 = N->getOperand(0);
10099   EVT VT = N->getValueType(0);
10100 
10101   if (N0.isUndef())
10102     return DAG.getUNDEF(VT);
10103 
10104   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10105     return Res;
10106 
10107   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10108     return SDValue(N, 0);
10109 
10110   return SDValue();
10111 }
10112 
10113 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10114   SDValue N0 = N->getOperand(0);
10115   EVT VT = N->getValueType(0);
10116   EVT SrcVT = N0.getValueType();
10117   bool isLE = DAG.getDataLayout().isLittleEndian();
10118 
10119   // noop truncate
10120   if (SrcVT == VT)
10121     return N0;
10122 
10123   // fold (truncate (truncate x)) -> (truncate x)
10124   if (N0.getOpcode() == ISD::TRUNCATE)
10125     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10126 
10127   // fold (truncate c1) -> c1
10128   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10129     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10130     if (C.getNode() != N)
10131       return C;
10132   }
10133 
10134   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10135   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10136       N0.getOpcode() == ISD::SIGN_EXTEND ||
10137       N0.getOpcode() == ISD::ANY_EXTEND) {
10138     // if the source is smaller than the dest, we still need an extend.
10139     if (N0.getOperand(0).getValueType().bitsLT(VT))
10140       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10141     // if the source is larger than the dest, than we just need the truncate.
10142     if (N0.getOperand(0).getValueType().bitsGT(VT))
10143       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10144     // if the source and dest are the same type, we can drop both the extend
10145     // and the truncate.
10146     return N0.getOperand(0);
10147   }
10148 
10149   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10150   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10151     return SDValue();
10152 
10153   // Fold extract-and-trunc into a narrow extract. For example:
10154   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10155   //   i32 y = TRUNCATE(i64 x)
10156   //        -- becomes --
10157   //   v16i8 b = BITCAST (v2i64 val)
10158   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10159   //
10160   // Note: We only run this optimization after type legalization (which often
10161   // creates this pattern) and before operation legalization after which
10162   // we need to be more careful about the vector instructions that we generate.
10163   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10164       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10165     EVT VecTy = N0.getOperand(0).getValueType();
10166     EVT ExTy = N0.getValueType();
10167     EVT TrTy = N->getValueType(0);
10168 
10169     unsigned NumElem = VecTy.getVectorNumElements();
10170     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10171 
10172     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10173     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10174 
10175     SDValue EltNo = N0->getOperand(1);
10176     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10177       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10178       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10179       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10180 
10181       SDLoc DL(N);
10182       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10183                          DAG.getBitcast(NVT, N0.getOperand(0)),
10184                          DAG.getConstant(Index, DL, IndexTy));
10185     }
10186   }
10187 
10188   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10189   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10190     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10191         TLI.isTruncateFree(SrcVT, VT)) {
10192       SDLoc SL(N0);
10193       SDValue Cond = N0.getOperand(0);
10194       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10195       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10196       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10197     }
10198   }
10199 
10200   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10201   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10202       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10203       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10204     SDValue Amt = N0.getOperand(1);
10205     KnownBits Known = DAG.computeKnownBits(Amt);
10206     unsigned Size = VT.getScalarSizeInBits();
10207     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10208       SDLoc SL(N);
10209       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10210 
10211       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10212       if (AmtVT != Amt.getValueType()) {
10213         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10214         AddToWorklist(Amt.getNode());
10215       }
10216       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10217     }
10218   }
10219 
10220   // Attempt to pre-truncate BUILD_VECTOR sources.
10221   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10222       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10223     SDLoc DL(N);
10224     EVT SVT = VT.getScalarType();
10225     SmallVector<SDValue, 8> TruncOps;
10226     for (const SDValue &Op : N0->op_values()) {
10227       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10228       TruncOps.push_back(TruncOp);
10229     }
10230     return DAG.getBuildVector(VT, DL, TruncOps);
10231   }
10232 
10233   // Fold a series of buildvector, bitcast, and truncate if possible.
10234   // For example fold
10235   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10236   //   (2xi32 (buildvector x, y)).
10237   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10238       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10239       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10240       N0.getOperand(0).hasOneUse()) {
10241     SDValue BuildVect = N0.getOperand(0);
10242     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10243     EVT TruncVecEltTy = VT.getVectorElementType();
10244 
10245     // Check that the element types match.
10246     if (BuildVectEltTy == TruncVecEltTy) {
10247       // Now we only need to compute the offset of the truncated elements.
10248       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10249       unsigned TruncVecNumElts = VT.getVectorNumElements();
10250       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10251 
10252       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10253              "Invalid number of elements");
10254 
10255       SmallVector<SDValue, 8> Opnds;
10256       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10257         Opnds.push_back(BuildVect.getOperand(i));
10258 
10259       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10260     }
10261   }
10262 
10263   // See if we can simplify the input to this truncate through knowledge that
10264   // only the low bits are being used.
10265   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10266   // Currently we only perform this optimization on scalars because vectors
10267   // may have different active low bits.
10268   if (!VT.isVector()) {
10269     APInt Mask =
10270         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10271     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10272       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10273   }
10274 
10275   // fold (truncate (load x)) -> (smaller load x)
10276   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10277   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10278     if (SDValue Reduced = ReduceLoadWidth(N))
10279       return Reduced;
10280 
10281     // Handle the case where the load remains an extending load even
10282     // after truncation.
10283     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10284       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10285       if (!LN0->isVolatile() &&
10286           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10287         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10288                                          VT, LN0->getChain(), LN0->getBasePtr(),
10289                                          LN0->getMemoryVT(),
10290                                          LN0->getMemOperand());
10291         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10292         return NewLoad;
10293       }
10294     }
10295   }
10296 
10297   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10298   // where ... are all 'undef'.
10299   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10300     SmallVector<EVT, 8> VTs;
10301     SDValue V;
10302     unsigned Idx = 0;
10303     unsigned NumDefs = 0;
10304 
10305     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10306       SDValue X = N0.getOperand(i);
10307       if (!X.isUndef()) {
10308         V = X;
10309         Idx = i;
10310         NumDefs++;
10311       }
10312       // Stop if more than one members are non-undef.
10313       if (NumDefs > 1)
10314         break;
10315       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10316                                      VT.getVectorElementType(),
10317                                      X.getValueType().getVectorNumElements()));
10318     }
10319 
10320     if (NumDefs == 0)
10321       return DAG.getUNDEF(VT);
10322 
10323     if (NumDefs == 1) {
10324       assert(V.getNode() && "The single defined operand is empty!");
10325       SmallVector<SDValue, 8> Opnds;
10326       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10327         if (i != Idx) {
10328           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10329           continue;
10330         }
10331         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10332         AddToWorklist(NV.getNode());
10333         Opnds.push_back(NV);
10334       }
10335       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10336     }
10337   }
10338 
10339   // Fold truncate of a bitcast of a vector to an extract of the low vector
10340   // element.
10341   //
10342   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10343   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10344     SDValue VecSrc = N0.getOperand(0);
10345     EVT SrcVT = VecSrc.getValueType();
10346     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10347         (!LegalOperations ||
10348          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10349       SDLoc SL(N);
10350 
10351       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10352       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10353       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10354                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10355     }
10356   }
10357 
10358   // Simplify the operands using demanded-bits information.
10359   if (!VT.isVector() &&
10360       SimplifyDemandedBits(SDValue(N, 0)))
10361     return SDValue(N, 0);
10362 
10363   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10364   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10365   // When the adde's carry is not used.
10366   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10367       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10368       // We only do for addcarry before legalize operation
10369       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10370        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10371     SDLoc SL(N);
10372     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10373     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10374     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10375     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10376   }
10377 
10378   // fold (truncate (extract_subvector(ext x))) ->
10379   //      (extract_subvector x)
10380   // TODO: This can be generalized to cover cases where the truncate and extract
10381   // do not fully cancel each other out.
10382   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10383     SDValue N00 = N0.getOperand(0);
10384     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10385         N00.getOpcode() == ISD::ZERO_EXTEND ||
10386         N00.getOpcode() == ISD::ANY_EXTEND) {
10387       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10388           VT.getVectorElementType())
10389         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10390                            N00.getOperand(0), N0.getOperand(1));
10391     }
10392   }
10393 
10394   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10395     return NewVSel;
10396 
10397   // Narrow a suitable binary operation with a non-opaque constant operand by
10398   // moving it ahead of the truncate. This is limited to pre-legalization
10399   // because targets may prefer a wider type during later combines and invert
10400   // this transform.
10401   switch (N0.getOpcode()) {
10402   case ISD::ADD:
10403   case ISD::SUB:
10404   case ISD::MUL:
10405   case ISD::AND:
10406   case ISD::OR:
10407   case ISD::XOR:
10408     if (!LegalOperations && N0.hasOneUse() &&
10409         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10410          isConstantOrConstantVector(N0.getOperand(1), true))) {
10411       // TODO: We already restricted this to pre-legalization, but for vectors
10412       // we are extra cautious to not create an unsupported operation.
10413       // Target-specific changes are likely needed to avoid regressions here.
10414       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10415         SDLoc DL(N);
10416         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10417         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10418         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10419       }
10420     }
10421   }
10422 
10423   return SDValue();
10424 }
10425 
10426 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10427   SDValue Elt = N->getOperand(i);
10428   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10429     return Elt.getNode();
10430   return Elt.getOperand(Elt.getResNo()).getNode();
10431 }
10432 
10433 /// build_pair (load, load) -> load
10434 /// if load locations are consecutive.
10435 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10436   assert(N->getOpcode() == ISD::BUILD_PAIR);
10437 
10438   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10439   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10440 
10441   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10442   // most significant part in elt 1. So when combining into one large load, we
10443   // need to consider the endianness.
10444   if (DAG.getDataLayout().isBigEndian())
10445     std::swap(LD1, LD2);
10446 
10447   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10448       LD1->getAddressSpace() != LD2->getAddressSpace())
10449     return SDValue();
10450   EVT LD1VT = LD1->getValueType(0);
10451   unsigned LD1Bytes = LD1VT.getStoreSize();
10452   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10453       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10454     unsigned Align = LD1->getAlignment();
10455     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10456         VT.getTypeForEVT(*DAG.getContext()));
10457 
10458     if (NewAlign <= Align &&
10459         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10460       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10461                          LD1->getPointerInfo(), Align);
10462   }
10463 
10464   return SDValue();
10465 }
10466 
10467 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10468   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10469   // and Lo parts; on big-endian machines it doesn't.
10470   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10471 }
10472 
10473 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10474                                     const TargetLowering &TLI) {
10475   // If this is not a bitcast to an FP type or if the target doesn't have
10476   // IEEE754-compliant FP logic, we're done.
10477   EVT VT = N->getValueType(0);
10478   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10479     return SDValue();
10480 
10481   // TODO: Handle cases where the integer constant is a different scalar
10482   // bitwidth to the FP.
10483   SDValue N0 = N->getOperand(0);
10484   EVT SourceVT = N0.getValueType();
10485   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10486     return SDValue();
10487 
10488   unsigned FPOpcode;
10489   APInt SignMask;
10490   switch (N0.getOpcode()) {
10491   case ISD::AND:
10492     FPOpcode = ISD::FABS;
10493     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10494     break;
10495   case ISD::XOR:
10496     FPOpcode = ISD::FNEG;
10497     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10498     break;
10499   case ISD::OR:
10500     FPOpcode = ISD::FABS;
10501     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10502     break;
10503   default:
10504     return SDValue();
10505   }
10506 
10507   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10508   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10509   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10510   //   fneg (fabs X)
10511   SDValue LogicOp0 = N0.getOperand(0);
10512   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10513   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10514       LogicOp0.getOpcode() == ISD::BITCAST &&
10515       LogicOp0.getOperand(0).getValueType() == VT) {
10516     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10517     NumFPLogicOpsConv++;
10518     if (N0.getOpcode() == ISD::OR)
10519       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10520     return FPOp;
10521   }
10522 
10523   return SDValue();
10524 }
10525 
10526 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10527   SDValue N0 = N->getOperand(0);
10528   EVT VT = N->getValueType(0);
10529 
10530   if (N0.isUndef())
10531     return DAG.getUNDEF(VT);
10532 
10533   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10534   // Only do this before legalize types, unless both types are integer and the
10535   // scalar type is legal. Only do this before legalize ops, since the target
10536   // maybe depending on the bitcast.
10537   // First check to see if this is all constant.
10538   // TODO: Support FP bitcasts after legalize types.
10539   if (VT.isVector() &&
10540       (!LegalTypes ||
10541        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10542         TLI.isTypeLegal(VT.getVectorElementType()))) &&
10543       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10544       cast<BuildVectorSDNode>(N0)->isConstant())
10545     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10546                                              VT.getVectorElementType());
10547 
10548   // If the input is a constant, let getNode fold it.
10549   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10550     // If we can't allow illegal operations, we need to check that this is just
10551     // a fp -> int or int -> conversion and that the resulting operation will
10552     // be legal.
10553     if (!LegalOperations ||
10554         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10555          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10556         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10557          TLI.isOperationLegal(ISD::Constant, VT))) {
10558       SDValue C = DAG.getBitcast(VT, N0);
10559       if (C.getNode() != N)
10560         return C;
10561     }
10562   }
10563 
10564   // (conv (conv x, t1), t2) -> (conv x, t2)
10565   if (N0.getOpcode() == ISD::BITCAST)
10566     return DAG.getBitcast(VT, N0.getOperand(0));
10567 
10568   // fold (conv (load x)) -> (load (conv*)x)
10569   // If the resultant load doesn't need a higher alignment than the original!
10570   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10571       // Do not remove the cast if the types differ in endian layout.
10572       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10573           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10574       // If the load is volatile, we only want to change the load type if the
10575       // resulting load is legal. Otherwise we might increase the number of
10576       // memory accesses. We don't care if the original type was legal or not
10577       // as we assume software couldn't rely on the number of accesses of an
10578       // illegal type.
10579       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10580        TLI.isOperationLegal(ISD::LOAD, VT)) &&
10581       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10582     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10583     unsigned OrigAlign = LN0->getAlignment();
10584 
10585     bool Fast = false;
10586     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10587                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
10588         Fast) {
10589       SDValue Load =
10590           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10591                       LN0->getPointerInfo(), OrigAlign,
10592                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10593       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10594       return Load;
10595     }
10596   }
10597 
10598   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10599     return V;
10600 
10601   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10602   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10603   //
10604   // For ppc_fp128:
10605   // fold (bitcast (fneg x)) ->
10606   //     flipbit = signbit
10607   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10608   //
10609   // fold (bitcast (fabs x)) ->
10610   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10611   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10612   // This often reduces constant pool loads.
10613   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10614        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10615       N0.getNode()->hasOneUse() && VT.isInteger() &&
10616       !VT.isVector() && !N0.getValueType().isVector()) {
10617     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10618     AddToWorklist(NewConv.getNode());
10619 
10620     SDLoc DL(N);
10621     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10622       assert(VT.getSizeInBits() == 128);
10623       SDValue SignBit = DAG.getConstant(
10624           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10625       SDValue FlipBit;
10626       if (N0.getOpcode() == ISD::FNEG) {
10627         FlipBit = SignBit;
10628         AddToWorklist(FlipBit.getNode());
10629       } else {
10630         assert(N0.getOpcode() == ISD::FABS);
10631         SDValue Hi =
10632             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10633                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10634                                               SDLoc(NewConv)));
10635         AddToWorklist(Hi.getNode());
10636         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10637         AddToWorklist(FlipBit.getNode());
10638       }
10639       SDValue FlipBits =
10640           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10641       AddToWorklist(FlipBits.getNode());
10642       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10643     }
10644     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10645     if (N0.getOpcode() == ISD::FNEG)
10646       return DAG.getNode(ISD::XOR, DL, VT,
10647                          NewConv, DAG.getConstant(SignBit, DL, VT));
10648     assert(N0.getOpcode() == ISD::FABS);
10649     return DAG.getNode(ISD::AND, DL, VT,
10650                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10651   }
10652 
10653   // fold (bitconvert (fcopysign cst, x)) ->
10654   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10655   // Note that we don't handle (copysign x, cst) because this can always be
10656   // folded to an fneg or fabs.
10657   //
10658   // For ppc_fp128:
10659   // fold (bitcast (fcopysign cst, x)) ->
10660   //     flipbit = (and (extract_element
10661   //                     (xor (bitcast cst), (bitcast x)), 0),
10662   //                    signbit)
10663   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10664   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10665       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10666       VT.isInteger() && !VT.isVector()) {
10667     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10668     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10669     if (isTypeLegal(IntXVT)) {
10670       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10671       AddToWorklist(X.getNode());
10672 
10673       // If X has a different width than the result/lhs, sext it or truncate it.
10674       unsigned VTWidth = VT.getSizeInBits();
10675       if (OrigXWidth < VTWidth) {
10676         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10677         AddToWorklist(X.getNode());
10678       } else if (OrigXWidth > VTWidth) {
10679         // To get the sign bit in the right place, we have to shift it right
10680         // before truncating.
10681         SDLoc DL(X);
10682         X = DAG.getNode(ISD::SRL, DL,
10683                         X.getValueType(), X,
10684                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10685                                         X.getValueType()));
10686         AddToWorklist(X.getNode());
10687         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10688         AddToWorklist(X.getNode());
10689       }
10690 
10691       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10692         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10693         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10694         AddToWorklist(Cst.getNode());
10695         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10696         AddToWorklist(X.getNode());
10697         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10698         AddToWorklist(XorResult.getNode());
10699         SDValue XorResult64 = DAG.getNode(
10700             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10701             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10702                                   SDLoc(XorResult)));
10703         AddToWorklist(XorResult64.getNode());
10704         SDValue FlipBit =
10705             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10706                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10707         AddToWorklist(FlipBit.getNode());
10708         SDValue FlipBits =
10709             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10710         AddToWorklist(FlipBits.getNode());
10711         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10712       }
10713       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10714       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10715                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10716       AddToWorklist(X.getNode());
10717 
10718       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10719       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10720                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10721       AddToWorklist(Cst.getNode());
10722 
10723       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10724     }
10725   }
10726 
10727   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10728   if (N0.getOpcode() == ISD::BUILD_PAIR)
10729     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10730       return CombineLD;
10731 
10732   // Remove double bitcasts from shuffles - this is often a legacy of
10733   // XformToShuffleWithZero being used to combine bitmaskings (of
10734   // float vectors bitcast to integer vectors) into shuffles.
10735   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10736   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10737       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10738       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10739       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10740     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10741 
10742     // If operands are a bitcast, peek through if it casts the original VT.
10743     // If operands are a constant, just bitcast back to original VT.
10744     auto PeekThroughBitcast = [&](SDValue Op) {
10745       if (Op.getOpcode() == ISD::BITCAST &&
10746           Op.getOperand(0).getValueType() == VT)
10747         return SDValue(Op.getOperand(0));
10748       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10749           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10750         return DAG.getBitcast(VT, Op);
10751       return SDValue();
10752     };
10753 
10754     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10755     // the result type of this bitcast. This would eliminate at least one
10756     // bitcast. See the transform in InstCombine.
10757     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10758     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10759     if (!(SV0 && SV1))
10760       return SDValue();
10761 
10762     int MaskScale =
10763         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10764     SmallVector<int, 8> NewMask;
10765     for (int M : SVN->getMask())
10766       for (int i = 0; i != MaskScale; ++i)
10767         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10768 
10769     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10770     if (!LegalMask) {
10771       std::swap(SV0, SV1);
10772       ShuffleVectorSDNode::commuteMask(NewMask);
10773       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10774     }
10775 
10776     if (LegalMask)
10777       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10778   }
10779 
10780   return SDValue();
10781 }
10782 
10783 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10784   EVT VT = N->getValueType(0);
10785   return CombineConsecutiveLoads(N, VT);
10786 }
10787 
10788 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10789 /// operands. DstEltVT indicates the destination element value type.
10790 SDValue DAGCombiner::
10791 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10792   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10793 
10794   // If this is already the right type, we're done.
10795   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10796 
10797   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10798   unsigned DstBitSize = DstEltVT.getSizeInBits();
10799 
10800   // If this is a conversion of N elements of one type to N elements of another
10801   // type, convert each element.  This handles FP<->INT cases.
10802   if (SrcBitSize == DstBitSize) {
10803     SmallVector<SDValue, 8> Ops;
10804     for (SDValue Op : BV->op_values()) {
10805       // If the vector element type is not legal, the BUILD_VECTOR operands
10806       // are promoted and implicitly truncated.  Make that explicit here.
10807       if (Op.getValueType() != SrcEltVT)
10808         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10809       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10810       AddToWorklist(Ops.back().getNode());
10811     }
10812     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10813                               BV->getValueType(0).getVectorNumElements());
10814     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10815   }
10816 
10817   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10818   // handle annoying details of growing/shrinking FP values, we convert them to
10819   // int first.
10820   if (SrcEltVT.isFloatingPoint()) {
10821     // Convert the input float vector to a int vector where the elements are the
10822     // same sizes.
10823     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10824     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10825     SrcEltVT = IntVT;
10826   }
10827 
10828   // Now we know the input is an integer vector.  If the output is a FP type,
10829   // convert to integer first, then to FP of the right size.
10830   if (DstEltVT.isFloatingPoint()) {
10831     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10832     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10833 
10834     // Next, convert to FP elements of the same size.
10835     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10836   }
10837 
10838   SDLoc DL(BV);
10839 
10840   // Okay, we know the src/dst types are both integers of differing types.
10841   // Handling growing first.
10842   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10843   if (SrcBitSize < DstBitSize) {
10844     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10845 
10846     SmallVector<SDValue, 8> Ops;
10847     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10848          i += NumInputsPerOutput) {
10849       bool isLE = DAG.getDataLayout().isLittleEndian();
10850       APInt NewBits = APInt(DstBitSize, 0);
10851       bool EltIsUndef = true;
10852       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10853         // Shift the previously computed bits over.
10854         NewBits <<= SrcBitSize;
10855         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10856         if (Op.isUndef()) continue;
10857         EltIsUndef = false;
10858 
10859         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10860                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10861       }
10862 
10863       if (EltIsUndef)
10864         Ops.push_back(DAG.getUNDEF(DstEltVT));
10865       else
10866         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10867     }
10868 
10869     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10870     return DAG.getBuildVector(VT, DL, Ops);
10871   }
10872 
10873   // Finally, this must be the case where we are shrinking elements: each input
10874   // turns into multiple outputs.
10875   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10876   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10877                             NumOutputsPerInput*BV->getNumOperands());
10878   SmallVector<SDValue, 8> Ops;
10879 
10880   for (const SDValue &Op : BV->op_values()) {
10881     if (Op.isUndef()) {
10882       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10883       continue;
10884     }
10885 
10886     APInt OpVal = cast<ConstantSDNode>(Op)->
10887                   getAPIntValue().zextOrTrunc(SrcBitSize);
10888 
10889     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10890       APInt ThisVal = OpVal.trunc(DstBitSize);
10891       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10892       OpVal.lshrInPlace(DstBitSize);
10893     }
10894 
10895     // For big endian targets, swap the order of the pieces of each element.
10896     if (DAG.getDataLayout().isBigEndian())
10897       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10898   }
10899 
10900   return DAG.getBuildVector(VT, DL, Ops);
10901 }
10902 
10903 static bool isContractable(SDNode *N) {
10904   SDNodeFlags F = N->getFlags();
10905   return F.hasAllowContract() || F.hasAllowReassociation();
10906 }
10907 
10908 /// Try to perform FMA combining on a given FADD node.
10909 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10910   SDValue N0 = N->getOperand(0);
10911   SDValue N1 = N->getOperand(1);
10912   EVT VT = N->getValueType(0);
10913   SDLoc SL(N);
10914 
10915   const TargetOptions &Options = DAG.getTarget().Options;
10916 
10917   // Floating-point multiply-add with intermediate rounding.
10918   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10919 
10920   // Floating-point multiply-add without intermediate rounding.
10921   bool HasFMA =
10922       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10923       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10924 
10925   // No valid opcode, do not combine.
10926   if (!HasFMAD && !HasFMA)
10927     return SDValue();
10928 
10929   SDNodeFlags Flags = N->getFlags();
10930   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10931   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10932                               CanFuse || HasFMAD);
10933   // If the addition is not contractable, do not combine.
10934   if (!AllowFusionGlobally && !isContractable(N))
10935     return SDValue();
10936 
10937   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10938   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10939     return SDValue();
10940 
10941   // Always prefer FMAD to FMA for precision.
10942   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10943   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10944 
10945   // Is the node an FMUL and contractable either due to global flags or
10946   // SDNodeFlags.
10947   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10948     if (N.getOpcode() != ISD::FMUL)
10949       return false;
10950     return AllowFusionGlobally || isContractable(N.getNode());
10951   };
10952   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10953   // prefer to fold the multiply with fewer uses.
10954   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10955     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10956       std::swap(N0, N1);
10957   }
10958 
10959   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10960   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10961     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10962                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10963   }
10964 
10965   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10966   // Note: Commutes FADD operands.
10967   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10968     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10969                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10970   }
10971 
10972   // Look through FP_EXTEND nodes to do more combining.
10973 
10974   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10975   if (N0.getOpcode() == ISD::FP_EXTEND) {
10976     SDValue N00 = N0.getOperand(0);
10977     if (isContractableFMUL(N00) &&
10978         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10979       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10980                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10981                                      N00.getOperand(0)),
10982                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10983                                      N00.getOperand(1)), N1, Flags);
10984     }
10985   }
10986 
10987   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10988   // Note: Commutes FADD operands.
10989   if (N1.getOpcode() == ISD::FP_EXTEND) {
10990     SDValue N10 = N1.getOperand(0);
10991     if (isContractableFMUL(N10) &&
10992         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10993       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10994                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10995                                      N10.getOperand(0)),
10996                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10997                                      N10.getOperand(1)), N0, Flags);
10998     }
10999   }
11000 
11001   // More folding opportunities when target permits.
11002   if (Aggressive) {
11003     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11004     if (CanFuse &&
11005         N0.getOpcode() == PreferredFusedOpcode &&
11006         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11007         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11008       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11009                          N0.getOperand(0), N0.getOperand(1),
11010                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11011                                      N0.getOperand(2).getOperand(0),
11012                                      N0.getOperand(2).getOperand(1),
11013                                      N1, Flags), Flags);
11014     }
11015 
11016     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11017     if (CanFuse &&
11018         N1->getOpcode() == PreferredFusedOpcode &&
11019         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11020         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11021       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11022                          N1.getOperand(0), N1.getOperand(1),
11023                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11024                                      N1.getOperand(2).getOperand(0),
11025                                      N1.getOperand(2).getOperand(1),
11026                                      N0, Flags), Flags);
11027     }
11028 
11029 
11030     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11031     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11032     auto FoldFAddFMAFPExtFMul = [&] (
11033       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11034       SDNodeFlags Flags) {
11035       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11036                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11037                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11038                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11039                                      Z, Flags), Flags);
11040     };
11041     if (N0.getOpcode() == PreferredFusedOpcode) {
11042       SDValue N02 = N0.getOperand(2);
11043       if (N02.getOpcode() == ISD::FP_EXTEND) {
11044         SDValue N020 = N02.getOperand(0);
11045         if (isContractableFMUL(N020) &&
11046             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11047           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11048                                       N020.getOperand(0), N020.getOperand(1),
11049                                       N1, Flags);
11050         }
11051       }
11052     }
11053 
11054     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11055     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11056     // FIXME: This turns two single-precision and one double-precision
11057     // operation into two double-precision operations, which might not be
11058     // interesting for all targets, especially GPUs.
11059     auto FoldFAddFPExtFMAFMul = [&] (
11060       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11061       SDNodeFlags Flags) {
11062       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11063                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11064                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11065                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11066                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11067                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11068                                      Z, Flags), Flags);
11069     };
11070     if (N0.getOpcode() == ISD::FP_EXTEND) {
11071       SDValue N00 = N0.getOperand(0);
11072       if (N00.getOpcode() == PreferredFusedOpcode) {
11073         SDValue N002 = N00.getOperand(2);
11074         if (isContractableFMUL(N002) &&
11075             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11076           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11077                                       N002.getOperand(0), N002.getOperand(1),
11078                                       N1, Flags);
11079         }
11080       }
11081     }
11082 
11083     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11084     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11085     if (N1.getOpcode() == PreferredFusedOpcode) {
11086       SDValue N12 = N1.getOperand(2);
11087       if (N12.getOpcode() == ISD::FP_EXTEND) {
11088         SDValue N120 = N12.getOperand(0);
11089         if (isContractableFMUL(N120) &&
11090             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11091           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11092                                       N120.getOperand(0), N120.getOperand(1),
11093                                       N0, Flags);
11094         }
11095       }
11096     }
11097 
11098     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11099     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11100     // FIXME: This turns two single-precision and one double-precision
11101     // operation into two double-precision operations, which might not be
11102     // interesting for all targets, especially GPUs.
11103     if (N1.getOpcode() == ISD::FP_EXTEND) {
11104       SDValue N10 = N1.getOperand(0);
11105       if (N10.getOpcode() == PreferredFusedOpcode) {
11106         SDValue N102 = N10.getOperand(2);
11107         if (isContractableFMUL(N102) &&
11108             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11109           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11110                                       N102.getOperand(0), N102.getOperand(1),
11111                                       N0, Flags);
11112         }
11113       }
11114     }
11115   }
11116 
11117   return SDValue();
11118 }
11119 
11120 /// Try to perform FMA combining on a given FSUB node.
11121 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11122   SDValue N0 = N->getOperand(0);
11123   SDValue N1 = N->getOperand(1);
11124   EVT VT = N->getValueType(0);
11125   SDLoc SL(N);
11126 
11127   const TargetOptions &Options = DAG.getTarget().Options;
11128   // Floating-point multiply-add with intermediate rounding.
11129   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11130 
11131   // Floating-point multiply-add without intermediate rounding.
11132   bool HasFMA =
11133       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11134       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11135 
11136   // No valid opcode, do not combine.
11137   if (!HasFMAD && !HasFMA)
11138     return SDValue();
11139 
11140   const SDNodeFlags Flags = N->getFlags();
11141   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11142   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11143                               CanFuse || HasFMAD);
11144 
11145   // If the subtraction is not contractable, do not combine.
11146   if (!AllowFusionGlobally && !isContractable(N))
11147     return SDValue();
11148 
11149   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11150   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11151     return SDValue();
11152 
11153   // Always prefer FMAD to FMA for precision.
11154   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11155   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11156 
11157   // Is the node an FMUL and contractable either due to global flags or
11158   // SDNodeFlags.
11159   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11160     if (N.getOpcode() != ISD::FMUL)
11161       return false;
11162     return AllowFusionGlobally || isContractable(N.getNode());
11163   };
11164 
11165   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11166   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11167     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11168                        N0.getOperand(0), N0.getOperand(1),
11169                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11170   }
11171 
11172   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11173   // Note: Commutes FSUB operands.
11174   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11175     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11176                        DAG.getNode(ISD::FNEG, SL, VT,
11177                                    N1.getOperand(0)),
11178                        N1.getOperand(1), N0, Flags);
11179   }
11180 
11181   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11182   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11183       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11184     SDValue N00 = N0.getOperand(0).getOperand(0);
11185     SDValue N01 = N0.getOperand(0).getOperand(1);
11186     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11187                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11188                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11189   }
11190 
11191   // Look through FP_EXTEND nodes to do more combining.
11192 
11193   // fold (fsub (fpext (fmul x, y)), z)
11194   //   -> (fma (fpext x), (fpext y), (fneg z))
11195   if (N0.getOpcode() == ISD::FP_EXTEND) {
11196     SDValue N00 = N0.getOperand(0);
11197     if (isContractableFMUL(N00) &&
11198         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11199       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11200                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11201                                      N00.getOperand(0)),
11202                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11203                                      N00.getOperand(1)),
11204                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11205     }
11206   }
11207 
11208   // fold (fsub x, (fpext (fmul y, z)))
11209   //   -> (fma (fneg (fpext y)), (fpext z), x)
11210   // Note: Commutes FSUB operands.
11211   if (N1.getOpcode() == ISD::FP_EXTEND) {
11212     SDValue N10 = N1.getOperand(0);
11213     if (isContractableFMUL(N10) &&
11214         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11215       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11216                          DAG.getNode(ISD::FNEG, SL, VT,
11217                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11218                                                  N10.getOperand(0))),
11219                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11220                                      N10.getOperand(1)),
11221                          N0, Flags);
11222     }
11223   }
11224 
11225   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11226   //   -> (fneg (fma (fpext x), (fpext y), z))
11227   // Note: This could be removed with appropriate canonicalization of the
11228   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11229   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11230   // from implementing the canonicalization in visitFSUB.
11231   if (N0.getOpcode() == ISD::FP_EXTEND) {
11232     SDValue N00 = N0.getOperand(0);
11233     if (N00.getOpcode() == ISD::FNEG) {
11234       SDValue N000 = N00.getOperand(0);
11235       if (isContractableFMUL(N000) &&
11236           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11237         return DAG.getNode(ISD::FNEG, SL, VT,
11238                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11239                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11240                                                    N000.getOperand(0)),
11241                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11242                                                    N000.getOperand(1)),
11243                                        N1, Flags));
11244       }
11245     }
11246   }
11247 
11248   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11249   //   -> (fneg (fma (fpext x)), (fpext y), z)
11250   // Note: This could be removed with appropriate canonicalization of the
11251   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11252   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11253   // from implementing the canonicalization in visitFSUB.
11254   if (N0.getOpcode() == ISD::FNEG) {
11255     SDValue N00 = N0.getOperand(0);
11256     if (N00.getOpcode() == ISD::FP_EXTEND) {
11257       SDValue N000 = N00.getOperand(0);
11258       if (isContractableFMUL(N000) &&
11259           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11260         return DAG.getNode(ISD::FNEG, SL, VT,
11261                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11262                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11263                                                    N000.getOperand(0)),
11264                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11265                                                    N000.getOperand(1)),
11266                                        N1, Flags));
11267       }
11268     }
11269   }
11270 
11271   // More folding opportunities when target permits.
11272   if (Aggressive) {
11273     // fold (fsub (fma x, y, (fmul u, v)), z)
11274     //   -> (fma x, y (fma u, v, (fneg z)))
11275     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11276         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11277         N0.getOperand(2)->hasOneUse()) {
11278       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11279                          N0.getOperand(0), N0.getOperand(1),
11280                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11281                                      N0.getOperand(2).getOperand(0),
11282                                      N0.getOperand(2).getOperand(1),
11283                                      DAG.getNode(ISD::FNEG, SL, VT,
11284                                                  N1), Flags), Flags);
11285     }
11286 
11287     // fold (fsub x, (fma y, z, (fmul u, v)))
11288     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11289     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11290         isContractableFMUL(N1.getOperand(2))) {
11291       SDValue N20 = N1.getOperand(2).getOperand(0);
11292       SDValue N21 = N1.getOperand(2).getOperand(1);
11293       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11294                          DAG.getNode(ISD::FNEG, SL, VT,
11295                                      N1.getOperand(0)),
11296                          N1.getOperand(1),
11297                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11298                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11299                                      N21, N0, Flags), Flags);
11300     }
11301 
11302 
11303     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11304     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11305     if (N0.getOpcode() == PreferredFusedOpcode) {
11306       SDValue N02 = N0.getOperand(2);
11307       if (N02.getOpcode() == ISD::FP_EXTEND) {
11308         SDValue N020 = N02.getOperand(0);
11309         if (isContractableFMUL(N020) &&
11310             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11311           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11312                              N0.getOperand(0), N0.getOperand(1),
11313                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11314                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11315                                                      N020.getOperand(0)),
11316                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11317                                                      N020.getOperand(1)),
11318                                          DAG.getNode(ISD::FNEG, SL, VT,
11319                                                      N1), Flags), Flags);
11320         }
11321       }
11322     }
11323 
11324     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11325     //   -> (fma (fpext x), (fpext y),
11326     //           (fma (fpext u), (fpext v), (fneg z)))
11327     // FIXME: This turns two single-precision and one double-precision
11328     // operation into two double-precision operations, which might not be
11329     // interesting for all targets, especially GPUs.
11330     if (N0.getOpcode() == ISD::FP_EXTEND) {
11331       SDValue N00 = N0.getOperand(0);
11332       if (N00.getOpcode() == PreferredFusedOpcode) {
11333         SDValue N002 = N00.getOperand(2);
11334         if (isContractableFMUL(N002) &&
11335             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11336           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11337                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11338                                          N00.getOperand(0)),
11339                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11340                                          N00.getOperand(1)),
11341                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11342                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11343                                                      N002.getOperand(0)),
11344                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11345                                                      N002.getOperand(1)),
11346                                          DAG.getNode(ISD::FNEG, SL, VT,
11347                                                      N1), Flags), Flags);
11348         }
11349       }
11350     }
11351 
11352     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11353     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11354     if (N1.getOpcode() == PreferredFusedOpcode &&
11355         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11356       SDValue N120 = N1.getOperand(2).getOperand(0);
11357       if (isContractableFMUL(N120) &&
11358           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11359         SDValue N1200 = N120.getOperand(0);
11360         SDValue N1201 = N120.getOperand(1);
11361         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11362                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11363                            N1.getOperand(1),
11364                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11365                                        DAG.getNode(ISD::FNEG, SL, VT,
11366                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11367                                                                VT, N1200)),
11368                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11369                                                    N1201),
11370                                        N0, Flags), Flags);
11371       }
11372     }
11373 
11374     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11375     //   -> (fma (fneg (fpext y)), (fpext z),
11376     //           (fma (fneg (fpext u)), (fpext v), x))
11377     // FIXME: This turns two single-precision and one double-precision
11378     // operation into two double-precision operations, which might not be
11379     // interesting for all targets, especially GPUs.
11380     if (N1.getOpcode() == ISD::FP_EXTEND &&
11381         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11382       SDValue CvtSrc = N1.getOperand(0);
11383       SDValue N100 = CvtSrc.getOperand(0);
11384       SDValue N101 = CvtSrc.getOperand(1);
11385       SDValue N102 = CvtSrc.getOperand(2);
11386       if (isContractableFMUL(N102) &&
11387           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11388         SDValue N1020 = N102.getOperand(0);
11389         SDValue N1021 = N102.getOperand(1);
11390         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11391                            DAG.getNode(ISD::FNEG, SL, VT,
11392                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11393                                                    N100)),
11394                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11395                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11396                                        DAG.getNode(ISD::FNEG, SL, VT,
11397                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11398                                                                VT, N1020)),
11399                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11400                                                    N1021),
11401                                        N0, Flags), Flags);
11402       }
11403     }
11404   }
11405 
11406   return SDValue();
11407 }
11408 
11409 /// Try to perform FMA combining on a given FMUL node based on the distributive
11410 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11411 /// subtraction instead of addition).
11412 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11413   SDValue N0 = N->getOperand(0);
11414   SDValue N1 = N->getOperand(1);
11415   EVT VT = N->getValueType(0);
11416   SDLoc SL(N);
11417   const SDNodeFlags Flags = N->getFlags();
11418 
11419   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11420 
11421   const TargetOptions &Options = DAG.getTarget().Options;
11422 
11423   // The transforms below are incorrect when x == 0 and y == inf, because the
11424   // intermediate multiplication produces a nan.
11425   if (!Options.NoInfsFPMath)
11426     return SDValue();
11427 
11428   // Floating-point multiply-add without intermediate rounding.
11429   bool HasFMA =
11430       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11431       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11432       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11433 
11434   // Floating-point multiply-add with intermediate rounding. This can result
11435   // in a less precise result due to the changed rounding order.
11436   bool HasFMAD = Options.UnsafeFPMath &&
11437                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11438 
11439   // No valid opcode, do not combine.
11440   if (!HasFMAD && !HasFMA)
11441     return SDValue();
11442 
11443   // Always prefer FMAD to FMA for precision.
11444   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11445   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11446 
11447   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11448   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11449   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11450     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11451       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11452         if (C->isExactlyValue(+1.0))
11453           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11454                              Y, Flags);
11455         if (C->isExactlyValue(-1.0))
11456           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11457                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11458       }
11459     }
11460     return SDValue();
11461   };
11462 
11463   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11464     return FMA;
11465   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11466     return FMA;
11467 
11468   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11469   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11470   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11471   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11472   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11473     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11474       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11475         if (C0->isExactlyValue(+1.0))
11476           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11477                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11478                              Y, Flags);
11479         if (C0->isExactlyValue(-1.0))
11480           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11481                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11482                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11483       }
11484       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11485         if (C1->isExactlyValue(+1.0))
11486           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11487                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11488         if (C1->isExactlyValue(-1.0))
11489           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11490                              Y, Flags);
11491       }
11492     }
11493     return SDValue();
11494   };
11495 
11496   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11497     return FMA;
11498   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11499     return FMA;
11500 
11501   return SDValue();
11502 }
11503 
11504 SDValue DAGCombiner::visitFADD(SDNode *N) {
11505   SDValue N0 = N->getOperand(0);
11506   SDValue N1 = N->getOperand(1);
11507   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11508   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11509   EVT VT = N->getValueType(0);
11510   SDLoc DL(N);
11511   const TargetOptions &Options = DAG.getTarget().Options;
11512   const SDNodeFlags Flags = N->getFlags();
11513 
11514   // fold vector ops
11515   if (VT.isVector())
11516     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11517       return FoldedVOp;
11518 
11519   // fold (fadd c1, c2) -> c1 + c2
11520   if (N0CFP && N1CFP)
11521     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11522 
11523   // canonicalize constant to RHS
11524   if (N0CFP && !N1CFP)
11525     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11526 
11527   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11528   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11529   if (N1C && N1C->isZero())
11530     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11531       return N0;
11532 
11533   if (SDValue NewSel = foldBinOpIntoSelect(N))
11534     return NewSel;
11535 
11536   // fold (fadd A, (fneg B)) -> (fsub A, B)
11537   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11538       isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11539     return DAG.getNode(ISD::FSUB, DL, VT, N0,
11540                        GetNegatedExpression(N1, DAG, LegalOperations,
11541                                             ForCodeSize), Flags);
11542 
11543   // fold (fadd (fneg A), B) -> (fsub B, A)
11544   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11545       isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11546     return DAG.getNode(ISD::FSUB, DL, VT, N1,
11547                        GetNegatedExpression(N0, DAG, LegalOperations,
11548                                             ForCodeSize), Flags);
11549 
11550   auto isFMulNegTwo = [](SDValue FMul) {
11551     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11552       return false;
11553     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11554     return C && C->isExactlyValue(-2.0);
11555   };
11556 
11557   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11558   if (isFMulNegTwo(N0)) {
11559     SDValue B = N0.getOperand(0);
11560     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11561     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11562   }
11563   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11564   if (isFMulNegTwo(N1)) {
11565     SDValue B = N1.getOperand(0);
11566     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11567     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11568   }
11569 
11570   // No FP constant should be created after legalization as Instruction
11571   // Selection pass has a hard time dealing with FP constants.
11572   bool AllowNewConst = (Level < AfterLegalizeDAG);
11573 
11574   // If 'unsafe math' or nnan is enabled, fold lots of things.
11575   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11576     // If allowed, fold (fadd (fneg x), x) -> 0.0
11577     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11578       return DAG.getConstantFP(0.0, DL, VT);
11579 
11580     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11581     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11582       return DAG.getConstantFP(0.0, DL, VT);
11583   }
11584 
11585   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11586   // TODO: break out portions of the transformations below for which Unsafe is
11587   //       considered and which do not require both nsz and reassoc
11588   if ((Options.UnsafeFPMath ||
11589        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11590       AllowNewConst) {
11591     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11592     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11593         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11594       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11595       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11596     }
11597 
11598     // We can fold chains of FADD's of the same value into multiplications.
11599     // This transform is not safe in general because we are reducing the number
11600     // of rounding steps.
11601     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11602       if (N0.getOpcode() == ISD::FMUL) {
11603         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11604         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11605 
11606         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11607         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11608           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11609                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11610           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11611         }
11612 
11613         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11614         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11615             N1.getOperand(0) == N1.getOperand(1) &&
11616             N0.getOperand(0) == N1.getOperand(0)) {
11617           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11618                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11619           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11620         }
11621       }
11622 
11623       if (N1.getOpcode() == ISD::FMUL) {
11624         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11625         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11626 
11627         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11628         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11629           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11630                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11631           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11632         }
11633 
11634         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11635         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11636             N0.getOperand(0) == N0.getOperand(1) &&
11637             N1.getOperand(0) == N0.getOperand(0)) {
11638           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11639                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11640           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11641         }
11642       }
11643 
11644       if (N0.getOpcode() == ISD::FADD) {
11645         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11646         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11647         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11648             (N0.getOperand(0) == N1)) {
11649           return DAG.getNode(ISD::FMUL, DL, VT,
11650                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11651         }
11652       }
11653 
11654       if (N1.getOpcode() == ISD::FADD) {
11655         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11656         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11657         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11658             N1.getOperand(0) == N0) {
11659           return DAG.getNode(ISD::FMUL, DL, VT,
11660                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11661         }
11662       }
11663 
11664       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11665       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11666           N0.getOperand(0) == N0.getOperand(1) &&
11667           N1.getOperand(0) == N1.getOperand(1) &&
11668           N0.getOperand(0) == N1.getOperand(0)) {
11669         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11670                            DAG.getConstantFP(4.0, DL, VT), Flags);
11671       }
11672     }
11673   } // enable-unsafe-fp-math
11674 
11675   // FADD -> FMA combines:
11676   if (SDValue Fused = visitFADDForFMACombine(N)) {
11677     AddToWorklist(Fused.getNode());
11678     return Fused;
11679   }
11680   return SDValue();
11681 }
11682 
11683 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11684   SDValue N0 = N->getOperand(0);
11685   SDValue N1 = N->getOperand(1);
11686   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11687   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11688   EVT VT = N->getValueType(0);
11689   SDLoc DL(N);
11690   const TargetOptions &Options = DAG.getTarget().Options;
11691   const SDNodeFlags Flags = N->getFlags();
11692 
11693   // fold vector ops
11694   if (VT.isVector())
11695     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11696       return FoldedVOp;
11697 
11698   // fold (fsub c1, c2) -> c1-c2
11699   if (N0CFP && N1CFP)
11700     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11701 
11702   if (SDValue NewSel = foldBinOpIntoSelect(N))
11703     return NewSel;
11704 
11705   // (fsub A, 0) -> A
11706   if (N1CFP && N1CFP->isZero()) {
11707     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11708         Flags.hasNoSignedZeros()) {
11709       return N0;
11710     }
11711   }
11712 
11713   if (N0 == N1) {
11714     // (fsub x, x) -> 0.0
11715     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11716       return DAG.getConstantFP(0.0f, DL, VT);
11717   }
11718 
11719   // (fsub -0.0, N1) -> -N1
11720   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
11721   //       FSUB does not specify the sign bit of a NaN. Also note that for
11722   //       the same reason, the inverse transform is not safe, unless fast math
11723   //       flags are in play.
11724   if (N0CFP && N0CFP->isZero()) {
11725     if (N0CFP->isNegative() ||
11726         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11727       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11728         return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
11729       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11730         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11731     }
11732   }
11733 
11734   if ((Options.UnsafeFPMath ||
11735       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11736       && N1.getOpcode() == ISD::FADD) {
11737     // X - (X + Y) -> -Y
11738     if (N0 == N1->getOperand(0))
11739       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11740     // X - (Y + X) -> -Y
11741     if (N0 == N1->getOperand(1))
11742       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11743   }
11744 
11745   // fold (fsub A, (fneg B)) -> (fadd A, B)
11746   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11747     return DAG.getNode(ISD::FADD, DL, VT, N0,
11748                        GetNegatedExpression(N1, DAG, LegalOperations,
11749                                             ForCodeSize), Flags);
11750 
11751   // FSUB -> FMA combines:
11752   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11753     AddToWorklist(Fused.getNode());
11754     return Fused;
11755   }
11756 
11757   return SDValue();
11758 }
11759 
11760 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11761   SDValue N0 = N->getOperand(0);
11762   SDValue N1 = N->getOperand(1);
11763   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11764   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11765   EVT VT = N->getValueType(0);
11766   SDLoc DL(N);
11767   const TargetOptions &Options = DAG.getTarget().Options;
11768   const SDNodeFlags Flags = N->getFlags();
11769 
11770   // fold vector ops
11771   if (VT.isVector()) {
11772     // This just handles C1 * C2 for vectors. Other vector folds are below.
11773     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11774       return FoldedVOp;
11775   }
11776 
11777   // fold (fmul c1, c2) -> c1*c2
11778   if (N0CFP && N1CFP)
11779     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11780 
11781   // canonicalize constant to RHS
11782   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11783      !isConstantFPBuildVectorOrConstantFP(N1))
11784     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11785 
11786   // fold (fmul A, 1.0) -> A
11787   if (N1CFP && N1CFP->isExactlyValue(1.0))
11788     return N0;
11789 
11790   if (SDValue NewSel = foldBinOpIntoSelect(N))
11791     return NewSel;
11792 
11793   if (Options.UnsafeFPMath ||
11794       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11795     // fold (fmul A, 0) -> 0
11796     if (N1CFP && N1CFP->isZero())
11797       return N1;
11798   }
11799 
11800   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11801     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11802     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11803         N0.getOpcode() == ISD::FMUL) {
11804       SDValue N00 = N0.getOperand(0);
11805       SDValue N01 = N0.getOperand(1);
11806       // Avoid an infinite loop by making sure that N00 is not a constant
11807       // (the inner multiply has not been constant folded yet).
11808       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11809           !isConstantFPBuildVectorOrConstantFP(N00)) {
11810         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11811         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11812       }
11813     }
11814 
11815     // Match a special-case: we convert X * 2.0 into fadd.
11816     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11817     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11818         N0.getOperand(0) == N0.getOperand(1)) {
11819       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11820       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11821       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11822     }
11823   }
11824 
11825   // fold (fmul X, 2.0) -> (fadd X, X)
11826   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11827     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11828 
11829   // fold (fmul X, -1.0) -> (fneg X)
11830   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11831     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11832       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11833 
11834   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11835   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
11836                                        ForCodeSize)) {
11837     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
11838                                          ForCodeSize)) {
11839       // Both can be negated for free, check to see if at least one is cheaper
11840       // negated.
11841       if (LHSNeg == 2 || RHSNeg == 2)
11842         return DAG.getNode(ISD::FMUL, DL, VT,
11843                            GetNegatedExpression(N0, DAG, LegalOperations,
11844                                                 ForCodeSize),
11845                            GetNegatedExpression(N1, DAG, LegalOperations,
11846                                                 ForCodeSize),
11847                            Flags);
11848     }
11849   }
11850 
11851   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11852   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11853   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11854       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11855       TLI.isOperationLegal(ISD::FABS, VT)) {
11856     SDValue Select = N0, X = N1;
11857     if (Select.getOpcode() != ISD::SELECT)
11858       std::swap(Select, X);
11859 
11860     SDValue Cond = Select.getOperand(0);
11861     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11862     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11863 
11864     if (TrueOpnd && FalseOpnd &&
11865         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11866         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11867         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11868       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11869       switch (CC) {
11870       default: break;
11871       case ISD::SETOLT:
11872       case ISD::SETULT:
11873       case ISD::SETOLE:
11874       case ISD::SETULE:
11875       case ISD::SETLT:
11876       case ISD::SETLE:
11877         std::swap(TrueOpnd, FalseOpnd);
11878         LLVM_FALLTHROUGH;
11879       case ISD::SETOGT:
11880       case ISD::SETUGT:
11881       case ISD::SETOGE:
11882       case ISD::SETUGE:
11883       case ISD::SETGT:
11884       case ISD::SETGE:
11885         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11886             TLI.isOperationLegal(ISD::FNEG, VT))
11887           return DAG.getNode(ISD::FNEG, DL, VT,
11888                    DAG.getNode(ISD::FABS, DL, VT, X));
11889         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11890           return DAG.getNode(ISD::FABS, DL, VT, X);
11891 
11892         break;
11893       }
11894     }
11895   }
11896 
11897   // FMUL -> FMA combines:
11898   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11899     AddToWorklist(Fused.getNode());
11900     return Fused;
11901   }
11902 
11903   return SDValue();
11904 }
11905 
11906 SDValue DAGCombiner::visitFMA(SDNode *N) {
11907   SDValue N0 = N->getOperand(0);
11908   SDValue N1 = N->getOperand(1);
11909   SDValue N2 = N->getOperand(2);
11910   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11911   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11912   EVT VT = N->getValueType(0);
11913   SDLoc DL(N);
11914   const TargetOptions &Options = DAG.getTarget().Options;
11915 
11916   // FMA nodes have flags that propagate to the created nodes.
11917   const SDNodeFlags Flags = N->getFlags();
11918   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11919 
11920   // Constant fold FMA.
11921   if (isa<ConstantFPSDNode>(N0) &&
11922       isa<ConstantFPSDNode>(N1) &&
11923       isa<ConstantFPSDNode>(N2)) {
11924     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11925   }
11926 
11927   if (UnsafeFPMath) {
11928     if (N0CFP && N0CFP->isZero())
11929       return N2;
11930     if (N1CFP && N1CFP->isZero())
11931       return N2;
11932   }
11933   // TODO: The FMA node should have flags that propagate to these nodes.
11934   if (N0CFP && N0CFP->isExactlyValue(1.0))
11935     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11936   if (N1CFP && N1CFP->isExactlyValue(1.0))
11937     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11938 
11939   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11940   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11941      !isConstantFPBuildVectorOrConstantFP(N1))
11942     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11943 
11944   if (UnsafeFPMath) {
11945     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11946     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11947         isConstantFPBuildVectorOrConstantFP(N1) &&
11948         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11949       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11950                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11951                                      Flags), Flags);
11952     }
11953 
11954     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11955     if (N0.getOpcode() == ISD::FMUL &&
11956         isConstantFPBuildVectorOrConstantFP(N1) &&
11957         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11958       return DAG.getNode(ISD::FMA, DL, VT,
11959                          N0.getOperand(0),
11960                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11961                                      Flags),
11962                          N2);
11963     }
11964   }
11965 
11966   // (fma x, 1, y) -> (fadd x, y)
11967   // (fma x, -1, y) -> (fadd (fneg x), y)
11968   if (N1CFP) {
11969     if (N1CFP->isExactlyValue(1.0))
11970       // TODO: The FMA node should have flags that propagate to this node.
11971       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11972 
11973     if (N1CFP->isExactlyValue(-1.0) &&
11974         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11975       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11976       AddToWorklist(RHSNeg.getNode());
11977       // TODO: The FMA node should have flags that propagate to this node.
11978       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11979     }
11980 
11981     // fma (fneg x), K, y -> fma x -K, y
11982     if (N0.getOpcode() == ISD::FNEG &&
11983         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11984          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
11985                                               ForCodeSize)))) {
11986       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11987                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11988     }
11989   }
11990 
11991   if (UnsafeFPMath) {
11992     // (fma x, c, x) -> (fmul x, (c+1))
11993     if (N1CFP && N0 == N2) {
11994       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11995                          DAG.getNode(ISD::FADD, DL, VT, N1,
11996                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11997                          Flags);
11998     }
11999 
12000     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12001     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12002       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12003                          DAG.getNode(ISD::FADD, DL, VT, N1,
12004                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12005                          Flags);
12006     }
12007   }
12008 
12009   return SDValue();
12010 }
12011 
12012 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12013 // reciprocal.
12014 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12015 // Notice that this is not always beneficial. One reason is different targets
12016 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12017 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12018 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12019 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12020   // TODO: Limit this transform based on optsize/minsize - it always creates at
12021   //       least 1 extra instruction. But the perf win may be substantial enough
12022   //       that only minsize should restrict this.
12023   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12024   const SDNodeFlags Flags = N->getFlags();
12025   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12026     return SDValue();
12027 
12028   // Skip if current node is a reciprocal.
12029   SDValue N0 = N->getOperand(0);
12030   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12031   if (N0CFP && N0CFP->isExactlyValue(1.0))
12032     return SDValue();
12033 
12034   // Exit early if the target does not want this transform or if there can't
12035   // possibly be enough uses of the divisor to make the transform worthwhile.
12036   SDValue N1 = N->getOperand(1);
12037   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12038 
12039   // For splat vectors, scale the number of uses by the splat factor. If we can
12040   // convert the division into a scalar op, that will likely be much faster.
12041   unsigned NumElts = 1;
12042   EVT VT = N->getValueType(0);
12043   if (VT.isVector() && DAG.isSplatValue(N1))
12044     NumElts = VT.getVectorNumElements();
12045 
12046   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12047     return SDValue();
12048 
12049   // Find all FDIV users of the same divisor.
12050   // Use a set because duplicates may be present in the user list.
12051   SetVector<SDNode *> Users;
12052   for (auto *U : N1->uses()) {
12053     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12054       // This division is eligible for optimization only if global unsafe math
12055       // is enabled or if this division allows reciprocal formation.
12056       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12057         Users.insert(U);
12058     }
12059   }
12060 
12061   // Now that we have the actual number of divisor uses, make sure it meets
12062   // the minimum threshold specified by the target.
12063   if ((Users.size() * NumElts) < MinUses)
12064     return SDValue();
12065 
12066   SDLoc DL(N);
12067   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12068   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12069 
12070   // Dividend / Divisor -> Dividend * Reciprocal
12071   for (auto *U : Users) {
12072     SDValue Dividend = U->getOperand(0);
12073     if (Dividend != FPOne) {
12074       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12075                                     Reciprocal, Flags);
12076       CombineTo(U, NewNode);
12077     } else if (U != Reciprocal.getNode()) {
12078       // In the absence of fast-math-flags, this user node is always the
12079       // same node as Reciprocal, but with FMF they may be different nodes.
12080       CombineTo(U, Reciprocal);
12081     }
12082   }
12083   return SDValue(N, 0);  // N was replaced.
12084 }
12085 
12086 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12087   SDValue N0 = N->getOperand(0);
12088   SDValue N1 = N->getOperand(1);
12089   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12090   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12091   EVT VT = N->getValueType(0);
12092   SDLoc DL(N);
12093   const TargetOptions &Options = DAG.getTarget().Options;
12094   SDNodeFlags Flags = N->getFlags();
12095 
12096   // fold vector ops
12097   if (VT.isVector())
12098     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12099       return FoldedVOp;
12100 
12101   // fold (fdiv c1, c2) -> c1/c2
12102   if (N0CFP && N1CFP)
12103     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12104 
12105   if (SDValue NewSel = foldBinOpIntoSelect(N))
12106     return NewSel;
12107 
12108   if (SDValue V = combineRepeatedFPDivisors(N))
12109     return V;
12110 
12111   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12112     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12113     if (N1CFP) {
12114       // Compute the reciprocal 1.0 / c2.
12115       const APFloat &N1APF = N1CFP->getValueAPF();
12116       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12117       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12118       // Only do the transform if the reciprocal is a legal fp immediate that
12119       // isn't too nasty (eg NaN, denormal, ...).
12120       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12121           (!LegalOperations ||
12122            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12123            // backend)... we should handle this gracefully after Legalize.
12124            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12125            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12126            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12127         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12128                            DAG.getConstantFP(Recip, DL, VT), Flags);
12129     }
12130 
12131     // If this FDIV is part of a reciprocal square root, it may be folded
12132     // into a target-specific square root estimate instruction.
12133     if (N1.getOpcode() == ISD::FSQRT) {
12134       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12135         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12136       }
12137     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12138                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12139       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12140                                           Flags)) {
12141         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12142         AddToWorklist(RV.getNode());
12143         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12144       }
12145     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12146                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12147       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12148                                           Flags)) {
12149         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12150         AddToWorklist(RV.getNode());
12151         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12152       }
12153     } else if (N1.getOpcode() == ISD::FMUL) {
12154       // Look through an FMUL. Even though this won't remove the FDIV directly,
12155       // it's still worthwhile to get rid of the FSQRT if possible.
12156       SDValue SqrtOp;
12157       SDValue OtherOp;
12158       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12159         SqrtOp = N1.getOperand(0);
12160         OtherOp = N1.getOperand(1);
12161       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12162         SqrtOp = N1.getOperand(1);
12163         OtherOp = N1.getOperand(0);
12164       }
12165       if (SqrtOp.getNode()) {
12166         // We found a FSQRT, so try to make this fold:
12167         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12168         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12169           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12170           AddToWorklist(RV.getNode());
12171           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12172         }
12173       }
12174     }
12175 
12176     // Fold into a reciprocal estimate and multiply instead of a real divide.
12177     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12178       AddToWorklist(RV.getNode());
12179       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12180     }
12181   }
12182 
12183   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12184   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12185                                        ForCodeSize)) {
12186     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12187                                          ForCodeSize)) {
12188       // Both can be negated for free, check to see if at least one is cheaper
12189       // negated.
12190       if (LHSNeg == 2 || RHSNeg == 2)
12191         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12192                            GetNegatedExpression(N0, DAG, LegalOperations,
12193                                                 ForCodeSize),
12194                            GetNegatedExpression(N1, DAG, LegalOperations,
12195                                                 ForCodeSize),
12196                            Flags);
12197     }
12198   }
12199 
12200   return SDValue();
12201 }
12202 
12203 SDValue DAGCombiner::visitFREM(SDNode *N) {
12204   SDValue N0 = N->getOperand(0);
12205   SDValue N1 = N->getOperand(1);
12206   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12207   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12208   EVT VT = N->getValueType(0);
12209 
12210   // fold (frem c1, c2) -> fmod(c1,c2)
12211   if (N0CFP && N1CFP)
12212     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12213 
12214   if (SDValue NewSel = foldBinOpIntoSelect(N))
12215     return NewSel;
12216 
12217   return SDValue();
12218 }
12219 
12220 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12221   SDNodeFlags Flags = N->getFlags();
12222   if (!DAG.getTarget().Options.UnsafeFPMath &&
12223       !Flags.hasApproximateFuncs())
12224     return SDValue();
12225 
12226   SDValue N0 = N->getOperand(0);
12227   if (TLI.isFsqrtCheap(N0, DAG))
12228     return SDValue();
12229 
12230   // FSQRT nodes have flags that propagate to the created nodes.
12231   return buildSqrtEstimate(N0, Flags);
12232 }
12233 
12234 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12235 /// copysign(x, fp_round(y)) -> copysign(x, y)
12236 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12237   SDValue N1 = N->getOperand(1);
12238   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12239        N1.getOpcode() == ISD::FP_ROUND)) {
12240     // Do not optimize out type conversion of f128 type yet.
12241     // For some targets like x86_64, configuration is changed to keep one f128
12242     // value in one SSE register, but instruction selection cannot handle
12243     // FCOPYSIGN on SSE registers yet.
12244     EVT N1VT = N1->getValueType(0);
12245     EVT N1Op0VT = N1->getOperand(0).getValueType();
12246     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12247   }
12248   return false;
12249 }
12250 
12251 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12252   SDValue N0 = N->getOperand(0);
12253   SDValue N1 = N->getOperand(1);
12254   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12255   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12256   EVT VT = N->getValueType(0);
12257 
12258   if (N0CFP && N1CFP) // Constant fold
12259     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12260 
12261   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12262     const APFloat &V = N1C->getValueAPF();
12263     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12264     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12265     if (!V.isNegative()) {
12266       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12267         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12268     } else {
12269       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12270         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12271                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12272     }
12273   }
12274 
12275   // copysign(fabs(x), y) -> copysign(x, y)
12276   // copysign(fneg(x), y) -> copysign(x, y)
12277   // copysign(copysign(x,z), y) -> copysign(x, y)
12278   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12279       N0.getOpcode() == ISD::FCOPYSIGN)
12280     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12281 
12282   // copysign(x, abs(y)) -> abs(x)
12283   if (N1.getOpcode() == ISD::FABS)
12284     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12285 
12286   // copysign(x, copysign(y,z)) -> copysign(x, z)
12287   if (N1.getOpcode() == ISD::FCOPYSIGN)
12288     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12289 
12290   // copysign(x, fp_extend(y)) -> copysign(x, y)
12291   // copysign(x, fp_round(y)) -> copysign(x, y)
12292   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12293     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12294 
12295   return SDValue();
12296 }
12297 
12298 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12299   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12300   if (!ExponentC)
12301     return SDValue();
12302 
12303   // Try to convert x ** (1/3) into cube root.
12304   // TODO: Handle the various flavors of long double.
12305   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12306   //       Some range near 1/3 should be fine.
12307   EVT VT = N->getValueType(0);
12308   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12309       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12310     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12311     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12312     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12313     // For regular numbers, rounding may cause the results to differ.
12314     // Therefore, we require { nsz ninf nnan afn } for this transform.
12315     // TODO: We could select out the special cases if we don't have nsz/ninf.
12316     SDNodeFlags Flags = N->getFlags();
12317     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12318         !Flags.hasApproximateFuncs())
12319       return SDValue();
12320 
12321     // Do not create a cbrt() libcall if the target does not have it, and do not
12322     // turn a pow that has lowering support into a cbrt() libcall.
12323     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12324         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12325          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12326       return SDValue();
12327 
12328     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12329   }
12330 
12331   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12332   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12333   // TODO: This could be extended (using a target hook) to handle smaller
12334   // power-of-2 fractional exponents.
12335   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12336   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12337   if (ExponentIs025 || ExponentIs075) {
12338     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12339     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12340     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12341     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12342     // For regular numbers, rounding may cause the results to differ.
12343     // Therefore, we require { nsz ninf afn } for this transform.
12344     // TODO: We could select out the special cases if we don't have nsz/ninf.
12345     SDNodeFlags Flags = N->getFlags();
12346 
12347     // We only need no signed zeros for the 0.25 case.
12348     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12349         !Flags.hasApproximateFuncs())
12350       return SDValue();
12351 
12352     // Don't double the number of libcalls. We are trying to inline fast code.
12353     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12354       return SDValue();
12355 
12356     // Assume that libcalls are the smallest code.
12357     // TODO: This restriction should probably be lifted for vectors.
12358     if (DAG.getMachineFunction().getFunction().hasOptSize())
12359       return SDValue();
12360 
12361     // pow(X, 0.25) --> sqrt(sqrt(X))
12362     SDLoc DL(N);
12363     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12364     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12365     if (ExponentIs025)
12366       return SqrtSqrt;
12367     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12368     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12369   }
12370 
12371   return SDValue();
12372 }
12373 
12374 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12375                                const TargetLowering &TLI) {
12376   // This optimization is guarded by a function attribute because it may produce
12377   // unexpected results. Ie, programs may be relying on the platform-specific
12378   // undefined behavior when the float-to-int conversion overflows.
12379   const Function &F = DAG.getMachineFunction().getFunction();
12380   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12381   if (StrictOverflow.getValueAsString().equals("false"))
12382     return SDValue();
12383 
12384   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12385   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12386   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12387   // conversions would return +0.0.
12388   // FIXME: We should be able to use node-level FMF here.
12389   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12390   EVT VT = N->getValueType(0);
12391   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12392       !DAG.getTarget().Options.NoSignedZerosFPMath)
12393     return SDValue();
12394 
12395   // fptosi/fptoui round towards zero, so converting from FP to integer and
12396   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12397   SDValue N0 = N->getOperand(0);
12398   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12399       N0.getOperand(0).getValueType() == VT)
12400     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12401 
12402   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12403       N0.getOperand(0).getValueType() == VT)
12404     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12405 
12406   return SDValue();
12407 }
12408 
12409 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12410   SDValue N0 = N->getOperand(0);
12411   EVT VT = N->getValueType(0);
12412   EVT OpVT = N0.getValueType();
12413 
12414   // fold (sint_to_fp c1) -> c1fp
12415   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12416       // ...but only if the target supports immediate floating-point values
12417       (!LegalOperations ||
12418        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12419     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12420 
12421   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12422   // but UINT_TO_FP is legal on this target, try to convert.
12423   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12424       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12425     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12426     if (DAG.SignBitIsZero(N0))
12427       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12428   }
12429 
12430   // The next optimizations are desirable only if SELECT_CC can be lowered.
12431   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12432     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12433     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12434         !VT.isVector() &&
12435         (!LegalOperations ||
12436          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12437       SDLoc DL(N);
12438       SDValue Ops[] =
12439         { N0.getOperand(0), N0.getOperand(1),
12440           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12441           N0.getOperand(2) };
12442       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12443     }
12444 
12445     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12446     //      (select_cc x, y, 1.0, 0.0,, cc)
12447     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12448         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12449         (!LegalOperations ||
12450          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12451       SDLoc DL(N);
12452       SDValue Ops[] =
12453         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12454           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12455           N0.getOperand(0).getOperand(2) };
12456       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12457     }
12458   }
12459 
12460   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12461     return FTrunc;
12462 
12463   return SDValue();
12464 }
12465 
12466 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12467   SDValue N0 = N->getOperand(0);
12468   EVT VT = N->getValueType(0);
12469   EVT OpVT = N0.getValueType();
12470 
12471   // fold (uint_to_fp c1) -> c1fp
12472   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12473       // ...but only if the target supports immediate floating-point values
12474       (!LegalOperations ||
12475        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12476     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12477 
12478   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12479   // but SINT_TO_FP is legal on this target, try to convert.
12480   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12481       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12482     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12483     if (DAG.SignBitIsZero(N0))
12484       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12485   }
12486 
12487   // The next optimizations are desirable only if SELECT_CC can be lowered.
12488   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12489     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12490     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12491         (!LegalOperations ||
12492          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12493       SDLoc DL(N);
12494       SDValue Ops[] =
12495         { N0.getOperand(0), N0.getOperand(1),
12496           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12497           N0.getOperand(2) };
12498       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12499     }
12500   }
12501 
12502   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12503     return FTrunc;
12504 
12505   return SDValue();
12506 }
12507 
12508 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12509 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12510   SDValue N0 = N->getOperand(0);
12511   EVT VT = N->getValueType(0);
12512 
12513   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12514     return SDValue();
12515 
12516   SDValue Src = N0.getOperand(0);
12517   EVT SrcVT = Src.getValueType();
12518   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12519   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12520 
12521   // We can safely assume the conversion won't overflow the output range,
12522   // because (for example) (uint8_t)18293.f is undefined behavior.
12523 
12524   // Since we can assume the conversion won't overflow, our decision as to
12525   // whether the input will fit in the float should depend on the minimum
12526   // of the input range and output range.
12527 
12528   // This means this is also safe for a signed input and unsigned output, since
12529   // a negative input would lead to undefined behavior.
12530   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12531   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12532   unsigned ActualSize = std::min(InputSize, OutputSize);
12533   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12534 
12535   // We can only fold away the float conversion if the input range can be
12536   // represented exactly in the float range.
12537   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12538     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12539       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12540                                                        : ISD::ZERO_EXTEND;
12541       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12542     }
12543     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12544       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12545     return DAG.getBitcast(VT, Src);
12546   }
12547   return SDValue();
12548 }
12549 
12550 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12551   SDValue N0 = N->getOperand(0);
12552   EVT VT = N->getValueType(0);
12553 
12554   // fold (fp_to_sint c1fp) -> c1
12555   if (isConstantFPBuildVectorOrConstantFP(N0))
12556     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12557 
12558   return FoldIntToFPToInt(N, DAG);
12559 }
12560 
12561 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12562   SDValue N0 = N->getOperand(0);
12563   EVT VT = N->getValueType(0);
12564 
12565   // fold (fp_to_uint c1fp) -> c1
12566   if (isConstantFPBuildVectorOrConstantFP(N0))
12567     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12568 
12569   return FoldIntToFPToInt(N, DAG);
12570 }
12571 
12572 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12573   SDValue N0 = N->getOperand(0);
12574   SDValue N1 = N->getOperand(1);
12575   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12576   EVT VT = N->getValueType(0);
12577 
12578   // fold (fp_round c1fp) -> c1fp
12579   if (N0CFP)
12580     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12581 
12582   // fold (fp_round (fp_extend x)) -> x
12583   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12584     return N0.getOperand(0);
12585 
12586   // fold (fp_round (fp_round x)) -> (fp_round x)
12587   if (N0.getOpcode() == ISD::FP_ROUND) {
12588     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12589     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12590 
12591     // Skip this folding if it results in an fp_round from f80 to f16.
12592     //
12593     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12594     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12595     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12596     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12597     // x86.
12598     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12599       return SDValue();
12600 
12601     // If the first fp_round isn't a value preserving truncation, it might
12602     // introduce a tie in the second fp_round, that wouldn't occur in the
12603     // single-step fp_round we want to fold to.
12604     // In other words, double rounding isn't the same as rounding.
12605     // Also, this is a value preserving truncation iff both fp_round's are.
12606     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12607       SDLoc DL(N);
12608       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12609                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12610     }
12611   }
12612 
12613   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12614   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12615     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12616                               N0.getOperand(0), N1);
12617     AddToWorklist(Tmp.getNode());
12618     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12619                        Tmp, N0.getOperand(1));
12620   }
12621 
12622   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12623     return NewVSel;
12624 
12625   return SDValue();
12626 }
12627 
12628 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12629   SDValue N0 = N->getOperand(0);
12630   EVT VT = N->getValueType(0);
12631   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12632   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12633 
12634   // fold (fp_round_inreg c1fp) -> c1fp
12635   if (N0CFP && isTypeLegal(EVT)) {
12636     SDLoc DL(N);
12637     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12638     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12639   }
12640 
12641   return SDValue();
12642 }
12643 
12644 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12645   SDValue N0 = N->getOperand(0);
12646   EVT VT = N->getValueType(0);
12647 
12648   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12649   if (N->hasOneUse() &&
12650       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12651     return SDValue();
12652 
12653   // fold (fp_extend c1fp) -> c1fp
12654   if (isConstantFPBuildVectorOrConstantFP(N0))
12655     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12656 
12657   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12658   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12659       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12660     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12661 
12662   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12663   // value of X.
12664   if (N0.getOpcode() == ISD::FP_ROUND
12665       && N0.getConstantOperandVal(1) == 1) {
12666     SDValue In = N0.getOperand(0);
12667     if (In.getValueType() == VT) return In;
12668     if (VT.bitsLT(In.getValueType()))
12669       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12670                          In, N0.getOperand(1));
12671     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12672   }
12673 
12674   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12675   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12676        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12677     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12678     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12679                                      LN0->getChain(),
12680                                      LN0->getBasePtr(), N0.getValueType(),
12681                                      LN0->getMemOperand());
12682     CombineTo(N, ExtLoad);
12683     CombineTo(N0.getNode(),
12684               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12685                           N0.getValueType(), ExtLoad,
12686                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12687               ExtLoad.getValue(1));
12688     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12689   }
12690 
12691   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12692     return NewVSel;
12693 
12694   return SDValue();
12695 }
12696 
12697 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12698   SDValue N0 = N->getOperand(0);
12699   EVT VT = N->getValueType(0);
12700 
12701   // fold (fceil c1) -> fceil(c1)
12702   if (isConstantFPBuildVectorOrConstantFP(N0))
12703     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12704 
12705   return SDValue();
12706 }
12707 
12708 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12709   SDValue N0 = N->getOperand(0);
12710   EVT VT = N->getValueType(0);
12711 
12712   // fold (ftrunc c1) -> ftrunc(c1)
12713   if (isConstantFPBuildVectorOrConstantFP(N0))
12714     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12715 
12716   // fold ftrunc (known rounded int x) -> x
12717   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12718   // likely to be generated to extract integer from a rounded floating value.
12719   switch (N0.getOpcode()) {
12720   default: break;
12721   case ISD::FRINT:
12722   case ISD::FTRUNC:
12723   case ISD::FNEARBYINT:
12724   case ISD::FFLOOR:
12725   case ISD::FCEIL:
12726     return N0;
12727   }
12728 
12729   return SDValue();
12730 }
12731 
12732 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12733   SDValue N0 = N->getOperand(0);
12734   EVT VT = N->getValueType(0);
12735 
12736   // fold (ffloor c1) -> ffloor(c1)
12737   if (isConstantFPBuildVectorOrConstantFP(N0))
12738     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12739 
12740   return SDValue();
12741 }
12742 
12743 // FIXME: FNEG and FABS have a lot in common; refactor.
12744 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12745   SDValue N0 = N->getOperand(0);
12746   EVT VT = N->getValueType(0);
12747 
12748   // Constant fold FNEG.
12749   if (isConstantFPBuildVectorOrConstantFP(N0))
12750     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12751 
12752   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12753                          &DAG.getTarget().Options, ForCodeSize))
12754     return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12755 
12756   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12757   // constant pool values.
12758   if (!TLI.isFNegFree(VT) &&
12759       N0.getOpcode() == ISD::BITCAST &&
12760       N0.getNode()->hasOneUse()) {
12761     SDValue Int = N0.getOperand(0);
12762     EVT IntVT = Int.getValueType();
12763     if (IntVT.isInteger() && !IntVT.isVector()) {
12764       APInt SignMask;
12765       if (N0.getValueType().isVector()) {
12766         // For a vector, get a mask such as 0x80... per scalar element
12767         // and splat it.
12768         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12769         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12770       } else {
12771         // For a scalar, just generate 0x80...
12772         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12773       }
12774       SDLoc DL0(N0);
12775       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12776                         DAG.getConstant(SignMask, DL0, IntVT));
12777       AddToWorklist(Int.getNode());
12778       return DAG.getBitcast(VT, Int);
12779     }
12780   }
12781 
12782   // (fneg (fmul c, x)) -> (fmul -c, x)
12783   if (N0.getOpcode() == ISD::FMUL &&
12784       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12785     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12786     if (CFP1) {
12787       APFloat CVal = CFP1->getValueAPF();
12788       CVal.changeSign();
12789       if (Level >= AfterLegalizeDAG &&
12790           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
12791            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12792         return DAG.getNode(
12793             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12794             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12795             N0->getFlags());
12796     }
12797   }
12798 
12799   return SDValue();
12800 }
12801 
12802 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12803                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12804   SDValue N0 = N->getOperand(0);
12805   SDValue N1 = N->getOperand(1);
12806   EVT VT = N->getValueType(0);
12807   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12808   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12809 
12810   if (N0CFP && N1CFP) {
12811     const APFloat &C0 = N0CFP->getValueAPF();
12812     const APFloat &C1 = N1CFP->getValueAPF();
12813     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12814   }
12815 
12816   // Canonicalize to constant on RHS.
12817   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12818       !isConstantFPBuildVectorOrConstantFP(N1))
12819     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12820 
12821   return SDValue();
12822 }
12823 
12824 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12825   return visitFMinMax(DAG, N, minnum);
12826 }
12827 
12828 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12829   return visitFMinMax(DAG, N, maxnum);
12830 }
12831 
12832 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12833   return visitFMinMax(DAG, N, minimum);
12834 }
12835 
12836 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12837   return visitFMinMax(DAG, N, maximum);
12838 }
12839 
12840 SDValue DAGCombiner::visitFABS(SDNode *N) {
12841   SDValue N0 = N->getOperand(0);
12842   EVT VT = N->getValueType(0);
12843 
12844   // fold (fabs c1) -> fabs(c1)
12845   if (isConstantFPBuildVectorOrConstantFP(N0))
12846     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12847 
12848   // fold (fabs (fabs x)) -> (fabs x)
12849   if (N0.getOpcode() == ISD::FABS)
12850     return N->getOperand(0);
12851 
12852   // fold (fabs (fneg x)) -> (fabs x)
12853   // fold (fabs (fcopysign x, y)) -> (fabs x)
12854   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12855     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12856 
12857   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12858   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12859     SDValue Int = N0.getOperand(0);
12860     EVT IntVT = Int.getValueType();
12861     if (IntVT.isInteger() && !IntVT.isVector()) {
12862       APInt SignMask;
12863       if (N0.getValueType().isVector()) {
12864         // For a vector, get a mask such as 0x7f... per scalar element
12865         // and splat it.
12866         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12867         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12868       } else {
12869         // For a scalar, just generate 0x7f...
12870         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12871       }
12872       SDLoc DL(N0);
12873       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12874                         DAG.getConstant(SignMask, DL, IntVT));
12875       AddToWorklist(Int.getNode());
12876       return DAG.getBitcast(N->getValueType(0), Int);
12877     }
12878   }
12879 
12880   return SDValue();
12881 }
12882 
12883 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12884   SDValue Chain = N->getOperand(0);
12885   SDValue N1 = N->getOperand(1);
12886   SDValue N2 = N->getOperand(2);
12887 
12888   // If N is a constant we could fold this into a fallthrough or unconditional
12889   // branch. However that doesn't happen very often in normal code, because
12890   // Instcombine/SimplifyCFG should have handled the available opportunities.
12891   // If we did this folding here, it would be necessary to update the
12892   // MachineBasicBlock CFG, which is awkward.
12893 
12894   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12895   // on the target.
12896   if (N1.getOpcode() == ISD::SETCC &&
12897       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12898                                    N1.getOperand(0).getValueType())) {
12899     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12900                        Chain, N1.getOperand(2),
12901                        N1.getOperand(0), N1.getOperand(1), N2);
12902   }
12903 
12904   if (N1.hasOneUse()) {
12905     if (SDValue NewN1 = rebuildSetCC(N1))
12906       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12907   }
12908 
12909   return SDValue();
12910 }
12911 
12912 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12913   if (N.getOpcode() == ISD::SRL ||
12914       (N.getOpcode() == ISD::TRUNCATE &&
12915        (N.getOperand(0).hasOneUse() &&
12916         N.getOperand(0).getOpcode() == ISD::SRL))) {
12917     // Look pass the truncate.
12918     if (N.getOpcode() == ISD::TRUNCATE)
12919       N = N.getOperand(0);
12920 
12921     // Match this pattern so that we can generate simpler code:
12922     //
12923     //   %a = ...
12924     //   %b = and i32 %a, 2
12925     //   %c = srl i32 %b, 1
12926     //   brcond i32 %c ...
12927     //
12928     // into
12929     //
12930     //   %a = ...
12931     //   %b = and i32 %a, 2
12932     //   %c = setcc eq %b, 0
12933     //   brcond %c ...
12934     //
12935     // This applies only when the AND constant value has one bit set and the
12936     // SRL constant is equal to the log2 of the AND constant. The back-end is
12937     // smart enough to convert the result into a TEST/JMP sequence.
12938     SDValue Op0 = N.getOperand(0);
12939     SDValue Op1 = N.getOperand(1);
12940 
12941     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12942       SDValue AndOp1 = Op0.getOperand(1);
12943 
12944       if (AndOp1.getOpcode() == ISD::Constant) {
12945         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12946 
12947         if (AndConst.isPowerOf2() &&
12948             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12949           SDLoc DL(N);
12950           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12951                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12952                               ISD::SETNE);
12953         }
12954       }
12955     }
12956   }
12957 
12958   // Transform br(xor(x, y)) -> br(x != y)
12959   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12960   if (N.getOpcode() == ISD::XOR) {
12961     // Because we may call this on a speculatively constructed
12962     // SimplifiedSetCC Node, we need to simplify this node first.
12963     // Ideally this should be folded into SimplifySetCC and not
12964     // here. For now, grab a handle to N so we don't lose it from
12965     // replacements interal to the visit.
12966     HandleSDNode XORHandle(N);
12967     while (N.getOpcode() == ISD::XOR) {
12968       SDValue Tmp = visitXOR(N.getNode());
12969       // No simplification done.
12970       if (!Tmp.getNode())
12971         break;
12972       // Returning N is form in-visit replacement that may invalidated
12973       // N. Grab value from Handle.
12974       if (Tmp.getNode() == N.getNode())
12975         N = XORHandle.getValue();
12976       else // Node simplified. Try simplifying again.
12977         N = Tmp;
12978     }
12979 
12980     if (N.getOpcode() != ISD::XOR)
12981       return N;
12982 
12983     SDNode *TheXor = N.getNode();
12984 
12985     SDValue Op0 = TheXor->getOperand(0);
12986     SDValue Op1 = TheXor->getOperand(1);
12987 
12988     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12989       bool Equal = false;
12990       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12991           Op0.getOpcode() == ISD::XOR) {
12992         TheXor = Op0.getNode();
12993         Equal = true;
12994       }
12995 
12996       EVT SetCCVT = N.getValueType();
12997       if (LegalTypes)
12998         SetCCVT = getSetCCResultType(SetCCVT);
12999       // Replace the uses of XOR with SETCC
13000       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13001                           Equal ? ISD::SETEQ : ISD::SETNE);
13002     }
13003   }
13004 
13005   return SDValue();
13006 }
13007 
13008 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13009 //
13010 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13011   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13012   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13013 
13014   // If N is a constant we could fold this into a fallthrough or unconditional
13015   // branch. However that doesn't happen very often in normal code, because
13016   // Instcombine/SimplifyCFG should have handled the available opportunities.
13017   // If we did this folding here, it would be necessary to update the
13018   // MachineBasicBlock CFG, which is awkward.
13019 
13020   // Use SimplifySetCC to simplify SETCC's.
13021   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13022                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13023                                false);
13024   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13025 
13026   // fold to a simpler setcc
13027   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13028     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13029                        N->getOperand(0), Simp.getOperand(2),
13030                        Simp.getOperand(0), Simp.getOperand(1),
13031                        N->getOperand(4));
13032 
13033   return SDValue();
13034 }
13035 
13036 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13037 /// and that N may be folded in the load / store addressing mode.
13038 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13039                                     SelectionDAG &DAG,
13040                                     const TargetLowering &TLI) {
13041   EVT VT;
13042   unsigned AS;
13043 
13044   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13045     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13046       return false;
13047     VT = LD->getMemoryVT();
13048     AS = LD->getAddressSpace();
13049   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13050     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13051       return false;
13052     VT = ST->getMemoryVT();
13053     AS = ST->getAddressSpace();
13054   } else
13055     return false;
13056 
13057   TargetLowering::AddrMode AM;
13058   if (N->getOpcode() == ISD::ADD) {
13059     AM.HasBaseReg = true;
13060     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13061     if (Offset)
13062       // [reg +/- imm]
13063       AM.BaseOffs = Offset->getSExtValue();
13064     else
13065       // [reg +/- reg]
13066       AM.Scale = 1;
13067   } else if (N->getOpcode() == ISD::SUB) {
13068     AM.HasBaseReg = true;
13069     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13070     if (Offset)
13071       // [reg +/- imm]
13072       AM.BaseOffs = -Offset->getSExtValue();
13073     else
13074       // [reg +/- reg]
13075       AM.Scale = 1;
13076   } else
13077     return false;
13078 
13079   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13080                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13081 }
13082 
13083 /// Try turning a load/store into a pre-indexed load/store when the base
13084 /// pointer is an add or subtract and it has other uses besides the load/store.
13085 /// After the transformation, the new indexed load/store has effectively folded
13086 /// the add/subtract in and all of its other uses are redirected to the
13087 /// new load/store.
13088 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13089   if (Level < AfterLegalizeDAG)
13090     return false;
13091 
13092   bool isLoad = true;
13093   SDValue Ptr;
13094   EVT VT;
13095   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13096     if (LD->isIndexed())
13097       return false;
13098     VT = LD->getMemoryVT();
13099     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13100         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13101       return false;
13102     Ptr = LD->getBasePtr();
13103   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13104     if (ST->isIndexed())
13105       return false;
13106     VT = ST->getMemoryVT();
13107     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13108         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13109       return false;
13110     Ptr = ST->getBasePtr();
13111     isLoad = false;
13112   } else {
13113     return false;
13114   }
13115 
13116   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13117   // out.  There is no reason to make this a preinc/predec.
13118   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13119       Ptr.getNode()->hasOneUse())
13120     return false;
13121 
13122   // Ask the target to do addressing mode selection.
13123   SDValue BasePtr;
13124   SDValue Offset;
13125   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13126   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13127     return false;
13128 
13129   // Backends without true r+i pre-indexed forms may need to pass a
13130   // constant base with a variable offset so that constant coercion
13131   // will work with the patterns in canonical form.
13132   bool Swapped = false;
13133   if (isa<ConstantSDNode>(BasePtr)) {
13134     std::swap(BasePtr, Offset);
13135     Swapped = true;
13136   }
13137 
13138   // Don't create a indexed load / store with zero offset.
13139   if (isNullConstant(Offset))
13140     return false;
13141 
13142   // Try turning it into a pre-indexed load / store except when:
13143   // 1) The new base ptr is a frame index.
13144   // 2) If N is a store and the new base ptr is either the same as or is a
13145   //    predecessor of the value being stored.
13146   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13147   //    that would create a cycle.
13148   // 4) All uses are load / store ops that use it as old base ptr.
13149 
13150   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13151   // (plus the implicit offset) to a register to preinc anyway.
13152   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13153     return false;
13154 
13155   // Check #2.
13156   if (!isLoad) {
13157     SDValue Val = cast<StoreSDNode>(N)->getValue();
13158 
13159     // Would require a copy.
13160     if (Val == BasePtr)
13161       return false;
13162 
13163     // Would create a cycle.
13164     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13165       return false;
13166   }
13167 
13168   // Caches for hasPredecessorHelper.
13169   SmallPtrSet<const SDNode *, 32> Visited;
13170   SmallVector<const SDNode *, 16> Worklist;
13171   Worklist.push_back(N);
13172 
13173   // If the offset is a constant, there may be other adds of constants that
13174   // can be folded with this one. We should do this to avoid having to keep
13175   // a copy of the original base pointer.
13176   SmallVector<SDNode *, 16> OtherUses;
13177   if (isa<ConstantSDNode>(Offset))
13178     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13179                               UE = BasePtr.getNode()->use_end();
13180          UI != UE; ++UI) {
13181       SDUse &Use = UI.getUse();
13182       // Skip the use that is Ptr and uses of other results from BasePtr's
13183       // node (important for nodes that return multiple results).
13184       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13185         continue;
13186 
13187       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13188         continue;
13189 
13190       if (Use.getUser()->getOpcode() != ISD::ADD &&
13191           Use.getUser()->getOpcode() != ISD::SUB) {
13192         OtherUses.clear();
13193         break;
13194       }
13195 
13196       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13197       if (!isa<ConstantSDNode>(Op1)) {
13198         OtherUses.clear();
13199         break;
13200       }
13201 
13202       // FIXME: In some cases, we can be smarter about this.
13203       if (Op1.getValueType() != Offset.getValueType()) {
13204         OtherUses.clear();
13205         break;
13206       }
13207 
13208       OtherUses.push_back(Use.getUser());
13209     }
13210 
13211   if (Swapped)
13212     std::swap(BasePtr, Offset);
13213 
13214   // Now check for #3 and #4.
13215   bool RealUse = false;
13216 
13217   for (SDNode *Use : Ptr.getNode()->uses()) {
13218     if (Use == N)
13219       continue;
13220     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13221       return false;
13222 
13223     // If Ptr may be folded in addressing mode of other use, then it's
13224     // not profitable to do this transformation.
13225     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13226       RealUse = true;
13227   }
13228 
13229   if (!RealUse)
13230     return false;
13231 
13232   SDValue Result;
13233   if (isLoad)
13234     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13235                                 BasePtr, Offset, AM);
13236   else
13237     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13238                                  BasePtr, Offset, AM);
13239   ++PreIndexedNodes;
13240   ++NodesCombined;
13241   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13242              Result.getNode()->dump(&DAG); dbgs() << '\n');
13243   WorklistRemover DeadNodes(*this);
13244   if (isLoad) {
13245     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13246     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13247   } else {
13248     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13249   }
13250 
13251   // Finally, since the node is now dead, remove it from the graph.
13252   deleteAndRecombine(N);
13253 
13254   if (Swapped)
13255     std::swap(BasePtr, Offset);
13256 
13257   // Replace other uses of BasePtr that can be updated to use Ptr
13258   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13259     unsigned OffsetIdx = 1;
13260     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13261       OffsetIdx = 0;
13262     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13263            BasePtr.getNode() && "Expected BasePtr operand");
13264 
13265     // We need to replace ptr0 in the following expression:
13266     //   x0 * offset0 + y0 * ptr0 = t0
13267     // knowing that
13268     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13269     //
13270     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13271     // indexed load/store and the expression that needs to be re-written.
13272     //
13273     // Therefore, we have:
13274     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13275 
13276     ConstantSDNode *CN =
13277       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13278     int X0, X1, Y0, Y1;
13279     const APInt &Offset0 = CN->getAPIntValue();
13280     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13281 
13282     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13283     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13284     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13285     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13286 
13287     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13288 
13289     APInt CNV = Offset0;
13290     if (X0 < 0) CNV = -CNV;
13291     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13292     else CNV = CNV - Offset1;
13293 
13294     SDLoc DL(OtherUses[i]);
13295 
13296     // We can now generate the new expression.
13297     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13298     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13299 
13300     SDValue NewUse = DAG.getNode(Opcode,
13301                                  DL,
13302                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13303     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13304     deleteAndRecombine(OtherUses[i]);
13305   }
13306 
13307   // Replace the uses of Ptr with uses of the updated base value.
13308   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13309   deleteAndRecombine(Ptr.getNode());
13310   AddToWorklist(Result.getNode());
13311 
13312   return true;
13313 }
13314 
13315 /// Try to combine a load/store with a add/sub of the base pointer node into a
13316 /// post-indexed load/store. The transformation folded the add/subtract into the
13317 /// new indexed load/store effectively and all of its uses are redirected to the
13318 /// new load/store.
13319 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13320   if (Level < AfterLegalizeDAG)
13321     return false;
13322 
13323   bool isLoad = true;
13324   SDValue Ptr;
13325   EVT VT;
13326   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13327     if (LD->isIndexed())
13328       return false;
13329     VT = LD->getMemoryVT();
13330     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13331         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13332       return false;
13333     Ptr = LD->getBasePtr();
13334   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13335     if (ST->isIndexed())
13336       return false;
13337     VT = ST->getMemoryVT();
13338     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13339         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13340       return false;
13341     Ptr = ST->getBasePtr();
13342     isLoad = false;
13343   } else {
13344     return false;
13345   }
13346 
13347   if (Ptr.getNode()->hasOneUse())
13348     return false;
13349 
13350   for (SDNode *Op : Ptr.getNode()->uses()) {
13351     if (Op == N ||
13352         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13353       continue;
13354 
13355     SDValue BasePtr;
13356     SDValue Offset;
13357     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13358     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13359       // Don't create a indexed load / store with zero offset.
13360       if (isNullConstant(Offset))
13361         continue;
13362 
13363       // Try turning it into a post-indexed load / store except when
13364       // 1) All uses are load / store ops that use it as base ptr (and
13365       //    it may be folded as addressing mmode).
13366       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13367       //    nor a successor of N. Otherwise, if Op is folded that would
13368       //    create a cycle.
13369 
13370       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13371         continue;
13372 
13373       // Check for #1.
13374       bool TryNext = false;
13375       for (SDNode *Use : BasePtr.getNode()->uses()) {
13376         if (Use == Ptr.getNode())
13377           continue;
13378 
13379         // If all the uses are load / store addresses, then don't do the
13380         // transformation.
13381         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13382           bool RealUse = false;
13383           for (SDNode *UseUse : Use->uses()) {
13384             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13385               RealUse = true;
13386           }
13387 
13388           if (!RealUse) {
13389             TryNext = true;
13390             break;
13391           }
13392         }
13393       }
13394 
13395       if (TryNext)
13396         continue;
13397 
13398       // Check for #2.
13399       SmallPtrSet<const SDNode *, 32> Visited;
13400       SmallVector<const SDNode *, 8> Worklist;
13401       // Ptr is predecessor to both N and Op.
13402       Visited.insert(Ptr.getNode());
13403       Worklist.push_back(N);
13404       Worklist.push_back(Op);
13405       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13406           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13407         SDValue Result = isLoad
13408           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13409                                BasePtr, Offset, AM)
13410           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13411                                 BasePtr, Offset, AM);
13412         ++PostIndexedNodes;
13413         ++NodesCombined;
13414         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13415                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13416                    dbgs() << '\n');
13417         WorklistRemover DeadNodes(*this);
13418         if (isLoad) {
13419           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13420           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13421         } else {
13422           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13423         }
13424 
13425         // Finally, since the node is now dead, remove it from the graph.
13426         deleteAndRecombine(N);
13427 
13428         // Replace the uses of Use with uses of the updated base value.
13429         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13430                                       Result.getValue(isLoad ? 1 : 0));
13431         deleteAndRecombine(Op);
13432         return true;
13433       }
13434     }
13435   }
13436 
13437   return false;
13438 }
13439 
13440 /// Return the base-pointer arithmetic from an indexed \p LD.
13441 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13442   ISD::MemIndexedMode AM = LD->getAddressingMode();
13443   assert(AM != ISD::UNINDEXED);
13444   SDValue BP = LD->getOperand(1);
13445   SDValue Inc = LD->getOperand(2);
13446 
13447   // Some backends use TargetConstants for load offsets, but don't expect
13448   // TargetConstants in general ADD nodes. We can convert these constants into
13449   // regular Constants (if the constant is not opaque).
13450   assert((Inc.getOpcode() != ISD::TargetConstant ||
13451           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13452          "Cannot split out indexing using opaque target constants");
13453   if (Inc.getOpcode() == ISD::TargetConstant) {
13454     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13455     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13456                           ConstInc->getValueType(0));
13457   }
13458 
13459   unsigned Opc =
13460       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13461   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13462 }
13463 
13464 static inline int numVectorEltsOrZero(EVT T) {
13465   return T.isVector() ? T.getVectorNumElements() : 0;
13466 }
13467 
13468 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13469   Val = ST->getValue();
13470   EVT STType = Val.getValueType();
13471   EVT STMemType = ST->getMemoryVT();
13472   if (STType == STMemType)
13473     return true;
13474   if (isTypeLegal(STMemType))
13475     return false; // fail.
13476   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13477       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13478     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13479     return true;
13480   }
13481   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13482       STType.isInteger() && STMemType.isInteger()) {
13483     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13484     return true;
13485   }
13486   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13487     Val = DAG.getBitcast(STMemType, Val);
13488     return true;
13489   }
13490   return false; // fail.
13491 }
13492 
13493 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13494   EVT LDMemType = LD->getMemoryVT();
13495   EVT LDType = LD->getValueType(0);
13496   assert(Val.getValueType() == LDMemType &&
13497          "Attempting to extend value of non-matching type");
13498   if (LDType == LDMemType)
13499     return true;
13500   if (LDMemType.isInteger() && LDType.isInteger()) {
13501     switch (LD->getExtensionType()) {
13502     case ISD::NON_EXTLOAD:
13503       Val = DAG.getBitcast(LDType, Val);
13504       return true;
13505     case ISD::EXTLOAD:
13506       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13507       return true;
13508     case ISD::SEXTLOAD:
13509       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13510       return true;
13511     case ISD::ZEXTLOAD:
13512       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13513       return true;
13514     }
13515   }
13516   return false;
13517 }
13518 
13519 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13520   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13521     return SDValue();
13522   SDValue Chain = LD->getOperand(0);
13523   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13524   if (!ST || ST->isVolatile())
13525     return SDValue();
13526 
13527   EVT LDType = LD->getValueType(0);
13528   EVT LDMemType = LD->getMemoryVT();
13529   EVT STMemType = ST->getMemoryVT();
13530   EVT STType = ST->getValue().getValueType();
13531 
13532   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13533   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13534   int64_t Offset;
13535   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13536     return SDValue();
13537 
13538   // Normalize for Endianness. After this Offset=0 will denote that the least
13539   // significant bit in the loaded value maps to the least significant bit in
13540   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13541   // n:th least significant byte of the stored value.
13542   if (DAG.getDataLayout().isBigEndian())
13543     Offset = (STMemType.getStoreSizeInBits() -
13544               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13545 
13546   // Check that the stored value cover all bits that are loaded.
13547   bool STCoversLD =
13548       (Offset >= 0) &&
13549       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13550 
13551   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13552     if (LD->isIndexed()) {
13553       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13554                     LD->getAddressingMode() == ISD::POST_DEC);
13555       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13556       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13557                              LD->getOperand(1), LD->getOperand(2));
13558       SDValue Ops[] = {Val, Idx, Chain};
13559       return CombineTo(LD, Ops, 3);
13560     }
13561     return CombineTo(LD, Val, Chain);
13562   };
13563 
13564   if (!STCoversLD)
13565     return SDValue();
13566 
13567   // Memory as copy space (potentially masked).
13568   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13569     // Simple case: Direct non-truncating forwarding
13570     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13571       return ReplaceLd(LD, ST->getValue(), Chain);
13572     // Can we model the truncate and extension with an and mask?
13573     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13574         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13575       // Mask to size of LDMemType
13576       auto Mask =
13577           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13578                                                STMemType.getSizeInBits()),
13579                           SDLoc(ST), STType);
13580       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13581       return ReplaceLd(LD, Val, Chain);
13582     }
13583   }
13584 
13585   // TODO: Deal with nonzero offset.
13586   if (LD->getBasePtr().isUndef() || Offset != 0)
13587     return SDValue();
13588   // Model necessary truncations / extenstions.
13589   SDValue Val;
13590   // Truncate Value To Stored Memory Size.
13591   do {
13592     if (!getTruncatedStoreValue(ST, Val))
13593       continue;
13594     if (!isTypeLegal(LDMemType))
13595       continue;
13596     if (STMemType != LDMemType) {
13597       // TODO: Support vectors? This requires extract_subvector/bitcast.
13598       if (!STMemType.isVector() && !LDMemType.isVector() &&
13599           STMemType.isInteger() && LDMemType.isInteger())
13600         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13601       else
13602         continue;
13603     }
13604     if (!extendLoadedValueToExtension(LD, Val))
13605       continue;
13606     return ReplaceLd(LD, Val, Chain);
13607   } while (false);
13608 
13609   // On failure, cleanup dead nodes we may have created.
13610   if (Val->use_empty())
13611     deleteAndRecombine(Val.getNode());
13612   return SDValue();
13613 }
13614 
13615 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13616   LoadSDNode *LD  = cast<LoadSDNode>(N);
13617   SDValue Chain = LD->getChain();
13618   SDValue Ptr   = LD->getBasePtr();
13619 
13620   // If load is not volatile and there are no uses of the loaded value (and
13621   // the updated indexed value in case of indexed loads), change uses of the
13622   // chain value into uses of the chain input (i.e. delete the dead load).
13623   if (!LD->isVolatile()) {
13624     if (N->getValueType(1) == MVT::Other) {
13625       // Unindexed loads.
13626       if (!N->hasAnyUseOfValue(0)) {
13627         // It's not safe to use the two value CombineTo variant here. e.g.
13628         // v1, chain2 = load chain1, loc
13629         // v2, chain3 = load chain2, loc
13630         // v3         = add v2, c
13631         // Now we replace use of chain2 with chain1.  This makes the second load
13632         // isomorphic to the one we are deleting, and thus makes this load live.
13633         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13634                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13635                    dbgs() << "\n");
13636         WorklistRemover DeadNodes(*this);
13637         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13638         AddUsersToWorklist(Chain.getNode());
13639         if (N->use_empty())
13640           deleteAndRecombine(N);
13641 
13642         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13643       }
13644     } else {
13645       // Indexed loads.
13646       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13647 
13648       // If this load has an opaque TargetConstant offset, then we cannot split
13649       // the indexing into an add/sub directly (that TargetConstant may not be
13650       // valid for a different type of node, and we cannot convert an opaque
13651       // target constant into a regular constant).
13652       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13653                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13654 
13655       if (!N->hasAnyUseOfValue(0) &&
13656           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13657         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13658         SDValue Index;
13659         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13660           Index = SplitIndexingFromLoad(LD);
13661           // Try to fold the base pointer arithmetic into subsequent loads and
13662           // stores.
13663           AddUsersToWorklist(N);
13664         } else
13665           Index = DAG.getUNDEF(N->getValueType(1));
13666         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13667                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13668                    dbgs() << " and 2 other values\n");
13669         WorklistRemover DeadNodes(*this);
13670         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13671         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13672         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13673         deleteAndRecombine(N);
13674         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13675       }
13676     }
13677   }
13678 
13679   // If this load is directly stored, replace the load value with the stored
13680   // value.
13681   if (auto V = ForwardStoreValueToDirectLoad(LD))
13682     return V;
13683 
13684   // Try to infer better alignment information than the load already has.
13685   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13686     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13687       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13688         SDValue NewLoad = DAG.getExtLoad(
13689             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13690             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13691             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13692         // NewLoad will always be N as we are only refining the alignment
13693         assert(NewLoad.getNode() == N);
13694         (void)NewLoad;
13695       }
13696     }
13697   }
13698 
13699   if (LD->isUnindexed()) {
13700     // Walk up chain skipping non-aliasing memory nodes.
13701     SDValue BetterChain = FindBetterChain(LD, Chain);
13702 
13703     // If there is a better chain.
13704     if (Chain != BetterChain) {
13705       SDValue ReplLoad;
13706 
13707       // Replace the chain to void dependency.
13708       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13709         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13710                                BetterChain, Ptr, LD->getMemOperand());
13711       } else {
13712         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13713                                   LD->getValueType(0),
13714                                   BetterChain, Ptr, LD->getMemoryVT(),
13715                                   LD->getMemOperand());
13716       }
13717 
13718       // Create token factor to keep old chain connected.
13719       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13720                                   MVT::Other, Chain, ReplLoad.getValue(1));
13721 
13722       // Replace uses with load result and token factor
13723       return CombineTo(N, ReplLoad.getValue(0), Token);
13724     }
13725   }
13726 
13727   // Try transforming N to an indexed load.
13728   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13729     return SDValue(N, 0);
13730 
13731   // Try to slice up N to more direct loads if the slices are mapped to
13732   // different register banks or pairing can take place.
13733   if (SliceUpLoad(N))
13734     return SDValue(N, 0);
13735 
13736   return SDValue();
13737 }
13738 
13739 namespace {
13740 
13741 /// Helper structure used to slice a load in smaller loads.
13742 /// Basically a slice is obtained from the following sequence:
13743 /// Origin = load Ty1, Base
13744 /// Shift = srl Ty1 Origin, CstTy Amount
13745 /// Inst = trunc Shift to Ty2
13746 ///
13747 /// Then, it will be rewritten into:
13748 /// Slice = load SliceTy, Base + SliceOffset
13749 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13750 ///
13751 /// SliceTy is deduced from the number of bits that are actually used to
13752 /// build Inst.
13753 struct LoadedSlice {
13754   /// Helper structure used to compute the cost of a slice.
13755   struct Cost {
13756     /// Are we optimizing for code size.
13757     bool ForCodeSize;
13758 
13759     /// Various cost.
13760     unsigned Loads = 0;
13761     unsigned Truncates = 0;
13762     unsigned CrossRegisterBanksCopies = 0;
13763     unsigned ZExts = 0;
13764     unsigned Shift = 0;
13765 
13766     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13767 
13768     /// Get the cost of one isolated slice.
13769     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13770         : ForCodeSize(ForCodeSize), Loads(1) {
13771       EVT TruncType = LS.Inst->getValueType(0);
13772       EVT LoadedType = LS.getLoadedType();
13773       if (TruncType != LoadedType &&
13774           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13775         ZExts = 1;
13776     }
13777 
13778     /// Account for slicing gain in the current cost.
13779     /// Slicing provide a few gains like removing a shift or a
13780     /// truncate. This method allows to grow the cost of the original
13781     /// load with the gain from this slice.
13782     void addSliceGain(const LoadedSlice &LS) {
13783       // Each slice saves a truncate.
13784       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13785       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13786                               LS.Inst->getValueType(0)))
13787         ++Truncates;
13788       // If there is a shift amount, this slice gets rid of it.
13789       if (LS.Shift)
13790         ++Shift;
13791       // If this slice can merge a cross register bank copy, account for it.
13792       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13793         ++CrossRegisterBanksCopies;
13794     }
13795 
13796     Cost &operator+=(const Cost &RHS) {
13797       Loads += RHS.Loads;
13798       Truncates += RHS.Truncates;
13799       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13800       ZExts += RHS.ZExts;
13801       Shift += RHS.Shift;
13802       return *this;
13803     }
13804 
13805     bool operator==(const Cost &RHS) const {
13806       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13807              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13808              ZExts == RHS.ZExts && Shift == RHS.Shift;
13809     }
13810 
13811     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13812 
13813     bool operator<(const Cost &RHS) const {
13814       // Assume cross register banks copies are as expensive as loads.
13815       // FIXME: Do we want some more target hooks?
13816       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13817       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13818       // Unless we are optimizing for code size, consider the
13819       // expensive operation first.
13820       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13821         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13822       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13823              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13824     }
13825 
13826     bool operator>(const Cost &RHS) const { return RHS < *this; }
13827 
13828     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13829 
13830     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13831   };
13832 
13833   // The last instruction that represent the slice. This should be a
13834   // truncate instruction.
13835   SDNode *Inst;
13836 
13837   // The original load instruction.
13838   LoadSDNode *Origin;
13839 
13840   // The right shift amount in bits from the original load.
13841   unsigned Shift;
13842 
13843   // The DAG from which Origin came from.
13844   // This is used to get some contextual information about legal types, etc.
13845   SelectionDAG *DAG;
13846 
13847   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13848               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13849       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13850 
13851   /// Get the bits used in a chunk of bits \p BitWidth large.
13852   /// \return Result is \p BitWidth and has used bits set to 1 and
13853   ///         not used bits set to 0.
13854   APInt getUsedBits() const {
13855     // Reproduce the trunc(lshr) sequence:
13856     // - Start from the truncated value.
13857     // - Zero extend to the desired bit width.
13858     // - Shift left.
13859     assert(Origin && "No original load to compare against.");
13860     unsigned BitWidth = Origin->getValueSizeInBits(0);
13861     assert(Inst && "This slice is not bound to an instruction");
13862     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13863            "Extracted slice is bigger than the whole type!");
13864     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13865     UsedBits.setAllBits();
13866     UsedBits = UsedBits.zext(BitWidth);
13867     UsedBits <<= Shift;
13868     return UsedBits;
13869   }
13870 
13871   /// Get the size of the slice to be loaded in bytes.
13872   unsigned getLoadedSize() const {
13873     unsigned SliceSize = getUsedBits().countPopulation();
13874     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13875     return SliceSize / 8;
13876   }
13877 
13878   /// Get the type that will be loaded for this slice.
13879   /// Note: This may not be the final type for the slice.
13880   EVT getLoadedType() const {
13881     assert(DAG && "Missing context");
13882     LLVMContext &Ctxt = *DAG->getContext();
13883     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13884   }
13885 
13886   /// Get the alignment of the load used for this slice.
13887   unsigned getAlignment() const {
13888     unsigned Alignment = Origin->getAlignment();
13889     unsigned Offset = getOffsetFromBase();
13890     if (Offset != 0)
13891       Alignment = MinAlign(Alignment, Alignment + Offset);
13892     return Alignment;
13893   }
13894 
13895   /// Check if this slice can be rewritten with legal operations.
13896   bool isLegal() const {
13897     // An invalid slice is not legal.
13898     if (!Origin || !Inst || !DAG)
13899       return false;
13900 
13901     // Offsets are for indexed load only, we do not handle that.
13902     if (!Origin->getOffset().isUndef())
13903       return false;
13904 
13905     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13906 
13907     // Check that the type is legal.
13908     EVT SliceType = getLoadedType();
13909     if (!TLI.isTypeLegal(SliceType))
13910       return false;
13911 
13912     // Check that the load is legal for this type.
13913     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13914       return false;
13915 
13916     // Check that the offset can be computed.
13917     // 1. Check its type.
13918     EVT PtrType = Origin->getBasePtr().getValueType();
13919     if (PtrType == MVT::Untyped || PtrType.isExtended())
13920       return false;
13921 
13922     // 2. Check that it fits in the immediate.
13923     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13924       return false;
13925 
13926     // 3. Check that the computation is legal.
13927     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13928       return false;
13929 
13930     // Check that the zext is legal if it needs one.
13931     EVT TruncateType = Inst->getValueType(0);
13932     if (TruncateType != SliceType &&
13933         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13934       return false;
13935 
13936     return true;
13937   }
13938 
13939   /// Get the offset in bytes of this slice in the original chunk of
13940   /// bits.
13941   /// \pre DAG != nullptr.
13942   uint64_t getOffsetFromBase() const {
13943     assert(DAG && "Missing context.");
13944     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13945     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13946     uint64_t Offset = Shift / 8;
13947     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13948     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13949            "The size of the original loaded type is not a multiple of a"
13950            " byte.");
13951     // If Offset is bigger than TySizeInBytes, it means we are loading all
13952     // zeros. This should have been optimized before in the process.
13953     assert(TySizeInBytes > Offset &&
13954            "Invalid shift amount for given loaded size");
13955     if (IsBigEndian)
13956       Offset = TySizeInBytes - Offset - getLoadedSize();
13957     return Offset;
13958   }
13959 
13960   /// Generate the sequence of instructions to load the slice
13961   /// represented by this object and redirect the uses of this slice to
13962   /// this new sequence of instructions.
13963   /// \pre this->Inst && this->Origin are valid Instructions and this
13964   /// object passed the legal check: LoadedSlice::isLegal returned true.
13965   /// \return The last instruction of the sequence used to load the slice.
13966   SDValue loadSlice() const {
13967     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13968     const SDValue &OldBaseAddr = Origin->getBasePtr();
13969     SDValue BaseAddr = OldBaseAddr;
13970     // Get the offset in that chunk of bytes w.r.t. the endianness.
13971     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13972     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13973     if (Offset) {
13974       // BaseAddr = BaseAddr + Offset.
13975       EVT ArithType = BaseAddr.getValueType();
13976       SDLoc DL(Origin);
13977       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13978                               DAG->getConstant(Offset, DL, ArithType));
13979     }
13980 
13981     // Create the type of the loaded slice according to its size.
13982     EVT SliceType = getLoadedType();
13983 
13984     // Create the load for the slice.
13985     SDValue LastInst =
13986         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13987                      Origin->getPointerInfo().getWithOffset(Offset),
13988                      getAlignment(), Origin->getMemOperand()->getFlags());
13989     // If the final type is not the same as the loaded type, this means that
13990     // we have to pad with zero. Create a zero extend for that.
13991     EVT FinalType = Inst->getValueType(0);
13992     if (SliceType != FinalType)
13993       LastInst =
13994           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13995     return LastInst;
13996   }
13997 
13998   /// Check if this slice can be merged with an expensive cross register
13999   /// bank copy. E.g.,
14000   /// i = load i32
14001   /// f = bitcast i32 i to float
14002   bool canMergeExpensiveCrossRegisterBankCopy() const {
14003     if (!Inst || !Inst->hasOneUse())
14004       return false;
14005     SDNode *Use = *Inst->use_begin();
14006     if (Use->getOpcode() != ISD::BITCAST)
14007       return false;
14008     assert(DAG && "Missing context");
14009     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14010     EVT ResVT = Use->getValueType(0);
14011     const TargetRegisterClass *ResRC =
14012         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14013     const TargetRegisterClass *ArgRC =
14014         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14015                            Use->getOperand(0)->isDivergent());
14016     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14017       return false;
14018 
14019     // At this point, we know that we perform a cross-register-bank copy.
14020     // Check if it is expensive.
14021     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14022     // Assume bitcasts are cheap, unless both register classes do not
14023     // explicitly share a common sub class.
14024     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14025       return false;
14026 
14027     // Check if it will be merged with the load.
14028     // 1. Check the alignment constraint.
14029     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14030         ResVT.getTypeForEVT(*DAG->getContext()));
14031 
14032     if (RequiredAlignment > getAlignment())
14033       return false;
14034 
14035     // 2. Check that the load is a legal operation for that type.
14036     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14037       return false;
14038 
14039     // 3. Check that we do not have a zext in the way.
14040     if (Inst->getValueType(0) != getLoadedType())
14041       return false;
14042 
14043     return true;
14044   }
14045 };
14046 
14047 } // end anonymous namespace
14048 
14049 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14050 /// \p UsedBits looks like 0..0 1..1 0..0.
14051 static bool areUsedBitsDense(const APInt &UsedBits) {
14052   // If all the bits are one, this is dense!
14053   if (UsedBits.isAllOnesValue())
14054     return true;
14055 
14056   // Get rid of the unused bits on the right.
14057   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14058   // Get rid of the unused bits on the left.
14059   if (NarrowedUsedBits.countLeadingZeros())
14060     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14061   // Check that the chunk of bits is completely used.
14062   return NarrowedUsedBits.isAllOnesValue();
14063 }
14064 
14065 /// Check whether or not \p First and \p Second are next to each other
14066 /// in memory. This means that there is no hole between the bits loaded
14067 /// by \p First and the bits loaded by \p Second.
14068 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14069                                      const LoadedSlice &Second) {
14070   assert(First.Origin == Second.Origin && First.Origin &&
14071          "Unable to match different memory origins.");
14072   APInt UsedBits = First.getUsedBits();
14073   assert((UsedBits & Second.getUsedBits()) == 0 &&
14074          "Slices are not supposed to overlap.");
14075   UsedBits |= Second.getUsedBits();
14076   return areUsedBitsDense(UsedBits);
14077 }
14078 
14079 /// Adjust the \p GlobalLSCost according to the target
14080 /// paring capabilities and the layout of the slices.
14081 /// \pre \p GlobalLSCost should account for at least as many loads as
14082 /// there is in the slices in \p LoadedSlices.
14083 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14084                                  LoadedSlice::Cost &GlobalLSCost) {
14085   unsigned NumberOfSlices = LoadedSlices.size();
14086   // If there is less than 2 elements, no pairing is possible.
14087   if (NumberOfSlices < 2)
14088     return;
14089 
14090   // Sort the slices so that elements that are likely to be next to each
14091   // other in memory are next to each other in the list.
14092   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14093     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14094     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14095   });
14096   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14097   // First (resp. Second) is the first (resp. Second) potentially candidate
14098   // to be placed in a paired load.
14099   const LoadedSlice *First = nullptr;
14100   const LoadedSlice *Second = nullptr;
14101   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14102                 // Set the beginning of the pair.
14103                                                            First = Second) {
14104     Second = &LoadedSlices[CurrSlice];
14105 
14106     // If First is NULL, it means we start a new pair.
14107     // Get to the next slice.
14108     if (!First)
14109       continue;
14110 
14111     EVT LoadedType = First->getLoadedType();
14112 
14113     // If the types of the slices are different, we cannot pair them.
14114     if (LoadedType != Second->getLoadedType())
14115       continue;
14116 
14117     // Check if the target supplies paired loads for this type.
14118     unsigned RequiredAlignment = 0;
14119     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14120       // move to the next pair, this type is hopeless.
14121       Second = nullptr;
14122       continue;
14123     }
14124     // Check if we meet the alignment requirement.
14125     if (RequiredAlignment > First->getAlignment())
14126       continue;
14127 
14128     // Check that both loads are next to each other in memory.
14129     if (!areSlicesNextToEachOther(*First, *Second))
14130       continue;
14131 
14132     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14133     --GlobalLSCost.Loads;
14134     // Move to the next pair.
14135     Second = nullptr;
14136   }
14137 }
14138 
14139 /// Check the profitability of all involved LoadedSlice.
14140 /// Currently, it is considered profitable if there is exactly two
14141 /// involved slices (1) which are (2) next to each other in memory, and
14142 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14143 ///
14144 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14145 /// the elements themselves.
14146 ///
14147 /// FIXME: When the cost model will be mature enough, we can relax
14148 /// constraints (1) and (2).
14149 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14150                                 const APInt &UsedBits, bool ForCodeSize) {
14151   unsigned NumberOfSlices = LoadedSlices.size();
14152   if (StressLoadSlicing)
14153     return NumberOfSlices > 1;
14154 
14155   // Check (1).
14156   if (NumberOfSlices != 2)
14157     return false;
14158 
14159   // Check (2).
14160   if (!areUsedBitsDense(UsedBits))
14161     return false;
14162 
14163   // Check (3).
14164   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14165   // The original code has one big load.
14166   OrigCost.Loads = 1;
14167   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14168     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14169     // Accumulate the cost of all the slices.
14170     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14171     GlobalSlicingCost += SliceCost;
14172 
14173     // Account as cost in the original configuration the gain obtained
14174     // with the current slices.
14175     OrigCost.addSliceGain(LS);
14176   }
14177 
14178   // If the target supports paired load, adjust the cost accordingly.
14179   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14180   return OrigCost > GlobalSlicingCost;
14181 }
14182 
14183 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14184 /// operations, split it in the various pieces being extracted.
14185 ///
14186 /// This sort of thing is introduced by SROA.
14187 /// This slicing takes care not to insert overlapping loads.
14188 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14189 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14190   if (Level < AfterLegalizeDAG)
14191     return false;
14192 
14193   LoadSDNode *LD = cast<LoadSDNode>(N);
14194   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14195       !LD->getValueType(0).isInteger())
14196     return false;
14197 
14198   // Keep track of already used bits to detect overlapping values.
14199   // In that case, we will just abort the transformation.
14200   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14201 
14202   SmallVector<LoadedSlice, 4> LoadedSlices;
14203 
14204   // Check if this load is used as several smaller chunks of bits.
14205   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14206   // of computation for each trunc.
14207   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14208        UI != UIEnd; ++UI) {
14209     // Skip the uses of the chain.
14210     if (UI.getUse().getResNo() != 0)
14211       continue;
14212 
14213     SDNode *User = *UI;
14214     unsigned Shift = 0;
14215 
14216     // Check if this is a trunc(lshr).
14217     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14218         isa<ConstantSDNode>(User->getOperand(1))) {
14219       Shift = User->getConstantOperandVal(1);
14220       User = *User->use_begin();
14221     }
14222 
14223     // At this point, User is a Truncate, iff we encountered, trunc or
14224     // trunc(lshr).
14225     if (User->getOpcode() != ISD::TRUNCATE)
14226       return false;
14227 
14228     // The width of the type must be a power of 2 and greater than 8-bits.
14229     // Otherwise the load cannot be represented in LLVM IR.
14230     // Moreover, if we shifted with a non-8-bits multiple, the slice
14231     // will be across several bytes. We do not support that.
14232     unsigned Width = User->getValueSizeInBits(0);
14233     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14234       return false;
14235 
14236     // Build the slice for this chain of computations.
14237     LoadedSlice LS(User, LD, Shift, &DAG);
14238     APInt CurrentUsedBits = LS.getUsedBits();
14239 
14240     // Check if this slice overlaps with another.
14241     if ((CurrentUsedBits & UsedBits) != 0)
14242       return false;
14243     // Update the bits used globally.
14244     UsedBits |= CurrentUsedBits;
14245 
14246     // Check if the new slice would be legal.
14247     if (!LS.isLegal())
14248       return false;
14249 
14250     // Record the slice.
14251     LoadedSlices.push_back(LS);
14252   }
14253 
14254   // Abort slicing if it does not seem to be profitable.
14255   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14256     return false;
14257 
14258   ++SlicedLoads;
14259 
14260   // Rewrite each chain to use an independent load.
14261   // By construction, each chain can be represented by a unique load.
14262 
14263   // Prepare the argument for the new token factor for all the slices.
14264   SmallVector<SDValue, 8> ArgChains;
14265   for (SmallVectorImpl<LoadedSlice>::const_iterator
14266            LSIt = LoadedSlices.begin(),
14267            LSItEnd = LoadedSlices.end();
14268        LSIt != LSItEnd; ++LSIt) {
14269     SDValue SliceInst = LSIt->loadSlice();
14270     CombineTo(LSIt->Inst, SliceInst, true);
14271     if (SliceInst.getOpcode() != ISD::LOAD)
14272       SliceInst = SliceInst.getOperand(0);
14273     assert(SliceInst->getOpcode() == ISD::LOAD &&
14274            "It takes more than a zext to get to the loaded slice!!");
14275     ArgChains.push_back(SliceInst.getValue(1));
14276   }
14277 
14278   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14279                               ArgChains);
14280   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14281   AddToWorklist(Chain.getNode());
14282   return true;
14283 }
14284 
14285 /// Check to see if V is (and load (ptr), imm), where the load is having
14286 /// specific bytes cleared out.  If so, return the byte size being masked out
14287 /// and the shift amount.
14288 static std::pair<unsigned, unsigned>
14289 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14290   std::pair<unsigned, unsigned> Result(0, 0);
14291 
14292   // Check for the structure we're looking for.
14293   if (V->getOpcode() != ISD::AND ||
14294       !isa<ConstantSDNode>(V->getOperand(1)) ||
14295       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14296     return Result;
14297 
14298   // Check the chain and pointer.
14299   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14300   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14301 
14302   // This only handles simple types.
14303   if (V.getValueType() != MVT::i16 &&
14304       V.getValueType() != MVT::i32 &&
14305       V.getValueType() != MVT::i64)
14306     return Result;
14307 
14308   // Check the constant mask.  Invert it so that the bits being masked out are
14309   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14310   // follow the sign bit for uniformity.
14311   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14312   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14313   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14314   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14315   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14316   if (NotMaskLZ == 64) return Result;  // All zero mask.
14317 
14318   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14319   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14320     return Result;
14321 
14322   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14323   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14324     NotMaskLZ -= 64-V.getValueSizeInBits();
14325 
14326   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14327   switch (MaskedBytes) {
14328   case 1:
14329   case 2:
14330   case 4: break;
14331   default: return Result; // All one mask, or 5-byte mask.
14332   }
14333 
14334   // Verify that the first bit starts at a multiple of mask so that the access
14335   // is aligned the same as the access width.
14336   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14337 
14338   // For narrowing to be valid, it must be the case that the load the
14339   // immediately preceding memory operation before the store.
14340   if (LD == Chain.getNode())
14341     ; // ok.
14342   else if (Chain->getOpcode() == ISD::TokenFactor &&
14343            SDValue(LD, 1).hasOneUse()) {
14344     // LD has only 1 chain use so they are no indirect dependencies.
14345     bool isOk = false;
14346     for (const SDValue &ChainOp : Chain->op_values())
14347       if (ChainOp.getNode() == LD) {
14348         isOk = true;
14349         break;
14350       }
14351     if (!isOk)
14352       return Result;
14353   } else
14354     return Result; // Fail.
14355 
14356   Result.first = MaskedBytes;
14357   Result.second = NotMaskTZ/8;
14358   return Result;
14359 }
14360 
14361 /// Check to see if IVal is something that provides a value as specified by
14362 /// MaskInfo. If so, replace the specified store with a narrower store of
14363 /// truncated IVal.
14364 static SDNode *
14365 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14366                                 SDValue IVal, StoreSDNode *St,
14367                                 DAGCombiner *DC) {
14368   unsigned NumBytes = MaskInfo.first;
14369   unsigned ByteShift = MaskInfo.second;
14370   SelectionDAG &DAG = DC->getDAG();
14371 
14372   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14373   // that uses this.  If not, this is not a replacement.
14374   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14375                                   ByteShift*8, (ByteShift+NumBytes)*8);
14376   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14377 
14378   // Check that it is legal on the target to do this.  It is legal if the new
14379   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14380   // legalization.
14381   MVT VT = MVT::getIntegerVT(NumBytes*8);
14382   if (!DC->isTypeLegal(VT))
14383     return nullptr;
14384 
14385   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14386   // shifted by ByteShift and truncated down to NumBytes.
14387   if (ByteShift) {
14388     SDLoc DL(IVal);
14389     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14390                        DAG.getConstant(ByteShift*8, DL,
14391                                     DC->getShiftAmountTy(IVal.getValueType())));
14392   }
14393 
14394   // Figure out the offset for the store and the alignment of the access.
14395   unsigned StOffset;
14396   unsigned NewAlign = St->getAlignment();
14397 
14398   if (DAG.getDataLayout().isLittleEndian())
14399     StOffset = ByteShift;
14400   else
14401     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14402 
14403   SDValue Ptr = St->getBasePtr();
14404   if (StOffset) {
14405     SDLoc DL(IVal);
14406     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14407                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14408     NewAlign = MinAlign(NewAlign, StOffset);
14409   }
14410 
14411   // Truncate down to the new size.
14412   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14413 
14414   ++OpsNarrowed;
14415   return DAG
14416       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14417                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14418       .getNode();
14419 }
14420 
14421 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14422 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14423 /// narrowing the load and store if it would end up being a win for performance
14424 /// or code size.
14425 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14426   StoreSDNode *ST  = cast<StoreSDNode>(N);
14427   if (ST->isVolatile())
14428     return SDValue();
14429 
14430   SDValue Chain = ST->getChain();
14431   SDValue Value = ST->getValue();
14432   SDValue Ptr   = ST->getBasePtr();
14433   EVT VT = Value.getValueType();
14434 
14435   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14436     return SDValue();
14437 
14438   unsigned Opc = Value.getOpcode();
14439 
14440   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14441   // is a byte mask indicating a consecutive number of bytes, check to see if
14442   // Y is known to provide just those bytes.  If so, we try to replace the
14443   // load + replace + store sequence with a single (narrower) store, which makes
14444   // the load dead.
14445   if (Opc == ISD::OR) {
14446     std::pair<unsigned, unsigned> MaskedLoad;
14447     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14448     if (MaskedLoad.first)
14449       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14450                                                   Value.getOperand(1), ST,this))
14451         return SDValue(NewST, 0);
14452 
14453     // Or is commutative, so try swapping X and Y.
14454     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14455     if (MaskedLoad.first)
14456       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14457                                                   Value.getOperand(0), ST,this))
14458         return SDValue(NewST, 0);
14459   }
14460 
14461   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14462       Value.getOperand(1).getOpcode() != ISD::Constant)
14463     return SDValue();
14464 
14465   SDValue N0 = Value.getOperand(0);
14466   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14467       Chain == SDValue(N0.getNode(), 1)) {
14468     LoadSDNode *LD = cast<LoadSDNode>(N0);
14469     if (LD->getBasePtr() != Ptr ||
14470         LD->getPointerInfo().getAddrSpace() !=
14471         ST->getPointerInfo().getAddrSpace())
14472       return SDValue();
14473 
14474     // Find the type to narrow it the load / op / store to.
14475     SDValue N1 = Value.getOperand(1);
14476     unsigned BitWidth = N1.getValueSizeInBits();
14477     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14478     if (Opc == ISD::AND)
14479       Imm ^= APInt::getAllOnesValue(BitWidth);
14480     if (Imm == 0 || Imm.isAllOnesValue())
14481       return SDValue();
14482     unsigned ShAmt = Imm.countTrailingZeros();
14483     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14484     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14485     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14486     // The narrowing should be profitable, the load/store operation should be
14487     // legal (or custom) and the store size should be equal to the NewVT width.
14488     while (NewBW < BitWidth &&
14489            (NewVT.getStoreSizeInBits() != NewBW ||
14490             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14491             !TLI.isNarrowingProfitable(VT, NewVT))) {
14492       NewBW = NextPowerOf2(NewBW);
14493       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14494     }
14495     if (NewBW >= BitWidth)
14496       return SDValue();
14497 
14498     // If the lsb changed does not start at the type bitwidth boundary,
14499     // start at the previous one.
14500     if (ShAmt % NewBW)
14501       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14502     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14503                                    std::min(BitWidth, ShAmt + NewBW));
14504     if ((Imm & Mask) == Imm) {
14505       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14506       if (Opc == ISD::AND)
14507         NewImm ^= APInt::getAllOnesValue(NewBW);
14508       uint64_t PtrOff = ShAmt / 8;
14509       // For big endian targets, we need to adjust the offset to the pointer to
14510       // load the correct bytes.
14511       if (DAG.getDataLayout().isBigEndian())
14512         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14513 
14514       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14515       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14516       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14517         return SDValue();
14518 
14519       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14520                                    Ptr.getValueType(), Ptr,
14521                                    DAG.getConstant(PtrOff, SDLoc(LD),
14522                                                    Ptr.getValueType()));
14523       SDValue NewLD =
14524           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14525                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14526                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14527       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14528                                    DAG.getConstant(NewImm, SDLoc(Value),
14529                                                    NewVT));
14530       SDValue NewST =
14531           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14532                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14533 
14534       AddToWorklist(NewPtr.getNode());
14535       AddToWorklist(NewLD.getNode());
14536       AddToWorklist(NewVal.getNode());
14537       WorklistRemover DeadNodes(*this);
14538       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14539       ++OpsNarrowed;
14540       return NewST;
14541     }
14542   }
14543 
14544   return SDValue();
14545 }
14546 
14547 /// For a given floating point load / store pair, if the load value isn't used
14548 /// by any other operations, then consider transforming the pair to integer
14549 /// load / store operations if the target deems the transformation profitable.
14550 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14551   StoreSDNode *ST  = cast<StoreSDNode>(N);
14552   SDValue Chain = ST->getChain();
14553   SDValue Value = ST->getValue();
14554   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14555       Value.hasOneUse() &&
14556       Chain == SDValue(Value.getNode(), 1)) {
14557     LoadSDNode *LD = cast<LoadSDNode>(Value);
14558     EVT VT = LD->getMemoryVT();
14559     if (!VT.isFloatingPoint() ||
14560         VT != ST->getMemoryVT() ||
14561         LD->isNonTemporal() ||
14562         ST->isNonTemporal() ||
14563         LD->getPointerInfo().getAddrSpace() != 0 ||
14564         ST->getPointerInfo().getAddrSpace() != 0)
14565       return SDValue();
14566 
14567     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14568     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14569         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14570         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14571         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14572       return SDValue();
14573 
14574     unsigned LDAlign = LD->getAlignment();
14575     unsigned STAlign = ST->getAlignment();
14576     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14577     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14578     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14579       return SDValue();
14580 
14581     SDValue NewLD =
14582         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14583                     LD->getPointerInfo(), LDAlign);
14584 
14585     SDValue NewST =
14586         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14587                      ST->getPointerInfo(), STAlign);
14588 
14589     AddToWorklist(NewLD.getNode());
14590     AddToWorklist(NewST.getNode());
14591     WorklistRemover DeadNodes(*this);
14592     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14593     ++LdStFP2Int;
14594     return NewST;
14595   }
14596 
14597   return SDValue();
14598 }
14599 
14600 // This is a helper function for visitMUL to check the profitability
14601 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14602 // MulNode is the original multiply, AddNode is (add x, c1),
14603 // and ConstNode is c2.
14604 //
14605 // If the (add x, c1) has multiple uses, we could increase
14606 // the number of adds if we make this transformation.
14607 // It would only be worth doing this if we can remove a
14608 // multiply in the process. Check for that here.
14609 // To illustrate:
14610 //     (A + c1) * c3
14611 //     (A + c2) * c3
14612 // We're checking for cases where we have common "c3 * A" expressions.
14613 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14614                                               SDValue &AddNode,
14615                                               SDValue &ConstNode) {
14616   APInt Val;
14617 
14618   // If the add only has one use, this would be OK to do.
14619   if (AddNode.getNode()->hasOneUse())
14620     return true;
14621 
14622   // Walk all the users of the constant with which we're multiplying.
14623   for (SDNode *Use : ConstNode->uses()) {
14624     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14625       continue;
14626 
14627     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14628       SDNode *OtherOp;
14629       SDNode *MulVar = AddNode.getOperand(0).getNode();
14630 
14631       // OtherOp is what we're multiplying against the constant.
14632       if (Use->getOperand(0) == ConstNode)
14633         OtherOp = Use->getOperand(1).getNode();
14634       else
14635         OtherOp = Use->getOperand(0).getNode();
14636 
14637       // Check to see if multiply is with the same operand of our "add".
14638       //
14639       //     ConstNode  = CONST
14640       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14641       //     ...
14642       //     AddNode  = (A + c1)  <-- MulVar is A.
14643       //         = AddNode * ConstNode   <-- current visiting instruction.
14644       //
14645       // If we make this transformation, we will have a common
14646       // multiply (ConstNode * A) that we can save.
14647       if (OtherOp == MulVar)
14648         return true;
14649 
14650       // Now check to see if a future expansion will give us a common
14651       // multiply.
14652       //
14653       //     ConstNode  = CONST
14654       //     AddNode    = (A + c1)
14655       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14656       //     ...
14657       //     OtherOp = (A + c2)
14658       //     Use     = OtherOp * ConstNode <-- visiting Use.
14659       //
14660       // If we make this transformation, we will have a common
14661       // multiply (CONST * A) after we also do the same transformation
14662       // to the "t2" instruction.
14663       if (OtherOp->getOpcode() == ISD::ADD &&
14664           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14665           OtherOp->getOperand(0).getNode() == MulVar)
14666         return true;
14667     }
14668   }
14669 
14670   // Didn't find a case where this would be profitable.
14671   return false;
14672 }
14673 
14674 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14675                                          unsigned NumStores) {
14676   SmallVector<SDValue, 8> Chains;
14677   SmallPtrSet<const SDNode *, 8> Visited;
14678   SDLoc StoreDL(StoreNodes[0].MemNode);
14679 
14680   for (unsigned i = 0; i < NumStores; ++i) {
14681     Visited.insert(StoreNodes[i].MemNode);
14682   }
14683 
14684   // don't include nodes that are children or repeated nodes.
14685   for (unsigned i = 0; i < NumStores; ++i) {
14686     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
14687       Chains.push_back(StoreNodes[i].MemNode->getChain());
14688   }
14689 
14690   assert(Chains.size() > 0 && "Chain should have generated a chain");
14691   return DAG.getTokenFactor(StoreDL, Chains);
14692 }
14693 
14694 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14695     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14696     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14697   // Make sure we have something to merge.
14698   if (NumStores < 2)
14699     return false;
14700 
14701   // The latest Node in the DAG.
14702   SDLoc DL(StoreNodes[0].MemNode);
14703 
14704   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14705   unsigned SizeInBits = NumStores * ElementSizeBits;
14706   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14707 
14708   EVT StoreTy;
14709   if (UseVector) {
14710     unsigned Elts = NumStores * NumMemElts;
14711     // Get the type for the merged vector store.
14712     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14713   } else
14714     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14715 
14716   SDValue StoredVal;
14717   if (UseVector) {
14718     if (IsConstantSrc) {
14719       SmallVector<SDValue, 8> BuildVector;
14720       for (unsigned I = 0; I != NumStores; ++I) {
14721         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14722         SDValue Val = St->getValue();
14723         // If constant is of the wrong type, convert it now.
14724         if (MemVT != Val.getValueType()) {
14725           Val = peekThroughBitcasts(Val);
14726           // Deal with constants of wrong size.
14727           if (ElementSizeBits != Val.getValueSizeInBits()) {
14728             EVT IntMemVT =
14729                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14730             if (isa<ConstantFPSDNode>(Val)) {
14731               // Not clear how to truncate FP values.
14732               return false;
14733             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14734               Val = DAG.getConstant(C->getAPIntValue()
14735                                         .zextOrTrunc(Val.getValueSizeInBits())
14736                                         .zextOrTrunc(ElementSizeBits),
14737                                     SDLoc(C), IntMemVT);
14738           }
14739           // Make sure correctly size type is the correct type.
14740           Val = DAG.getBitcast(MemVT, Val);
14741         }
14742         BuildVector.push_back(Val);
14743       }
14744       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14745                                                : ISD::BUILD_VECTOR,
14746                               DL, StoreTy, BuildVector);
14747     } else {
14748       SmallVector<SDValue, 8> Ops;
14749       for (unsigned i = 0; i < NumStores; ++i) {
14750         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14751         SDValue Val = peekThroughBitcasts(St->getValue());
14752         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14753         // type MemVT. If the underlying value is not the correct
14754         // type, but it is an extraction of an appropriate vector we
14755         // can recast Val to be of the correct type. This may require
14756         // converting between EXTRACT_VECTOR_ELT and
14757         // EXTRACT_SUBVECTOR.
14758         if ((MemVT != Val.getValueType()) &&
14759             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14760              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14761           EVT MemVTScalarTy = MemVT.getScalarType();
14762           // We may need to add a bitcast here to get types to line up.
14763           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14764             Val = DAG.getBitcast(MemVT, Val);
14765           } else {
14766             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14767                                             : ISD::EXTRACT_VECTOR_ELT;
14768             SDValue Vec = Val.getOperand(0);
14769             SDValue Idx = Val.getOperand(1);
14770             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14771           }
14772         }
14773         Ops.push_back(Val);
14774       }
14775 
14776       // Build the extracted vector elements back into a vector.
14777       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14778                                                : ISD::BUILD_VECTOR,
14779                               DL, StoreTy, Ops);
14780     }
14781   } else {
14782     // We should always use a vector store when merging extracted vector
14783     // elements, so this path implies a store of constants.
14784     assert(IsConstantSrc && "Merged vector elements should use vector store");
14785 
14786     APInt StoreInt(SizeInBits, 0);
14787 
14788     // Construct a single integer constant which is made of the smaller
14789     // constant inputs.
14790     bool IsLE = DAG.getDataLayout().isLittleEndian();
14791     for (unsigned i = 0; i < NumStores; ++i) {
14792       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14793       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14794 
14795       SDValue Val = St->getValue();
14796       Val = peekThroughBitcasts(Val);
14797       StoreInt <<= ElementSizeBits;
14798       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14799         StoreInt |= C->getAPIntValue()
14800                         .zextOrTrunc(ElementSizeBits)
14801                         .zextOrTrunc(SizeInBits);
14802       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14803         StoreInt |= C->getValueAPF()
14804                         .bitcastToAPInt()
14805                         .zextOrTrunc(ElementSizeBits)
14806                         .zextOrTrunc(SizeInBits);
14807         // If fp truncation is necessary give up for now.
14808         if (MemVT.getSizeInBits() != ElementSizeBits)
14809           return false;
14810       } else {
14811         llvm_unreachable("Invalid constant element type");
14812       }
14813     }
14814 
14815     // Create the new Load and Store operations.
14816     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14817   }
14818 
14819   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14820   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14821 
14822   // make sure we use trunc store if it's necessary to be legal.
14823   SDValue NewStore;
14824   if (!UseTrunc) {
14825     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14826                             FirstInChain->getPointerInfo(),
14827                             FirstInChain->getAlignment());
14828   } else { // Must be realized as a trunc store
14829     EVT LegalizedStoredValTy =
14830         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14831     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14832     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14833     SDValue ExtendedStoreVal =
14834         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14835                         LegalizedStoredValTy);
14836     NewStore = DAG.getTruncStore(
14837         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14838         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14839         FirstInChain->getAlignment(),
14840         FirstInChain->getMemOperand()->getFlags());
14841   }
14842 
14843   // Replace all merged stores with the new store.
14844   for (unsigned i = 0; i < NumStores; ++i)
14845     CombineTo(StoreNodes[i].MemNode, NewStore);
14846 
14847   AddToWorklist(NewChain.getNode());
14848   return true;
14849 }
14850 
14851 void DAGCombiner::getStoreMergeCandidates(
14852     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14853     SDNode *&RootNode) {
14854   // This holds the base pointer, index, and the offset in bytes from the base
14855   // pointer.
14856   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14857   EVT MemVT = St->getMemoryVT();
14858 
14859   SDValue Val = peekThroughBitcasts(St->getValue());
14860   // We must have a base and an offset.
14861   if (!BasePtr.getBase().getNode())
14862     return;
14863 
14864   // Do not handle stores to undef base pointers.
14865   if (BasePtr.getBase().isUndef())
14866     return;
14867 
14868   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14869   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14870                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14871   bool IsLoadSrc = isa<LoadSDNode>(Val);
14872   BaseIndexOffset LBasePtr;
14873   // Match on loadbaseptr if relevant.
14874   EVT LoadVT;
14875   if (IsLoadSrc) {
14876     auto *Ld = cast<LoadSDNode>(Val);
14877     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14878     LoadVT = Ld->getMemoryVT();
14879     // Load and store should be the same type.
14880     if (MemVT != LoadVT)
14881       return;
14882     // Loads must only have one use.
14883     if (!Ld->hasNUsesOfValue(1, 0))
14884       return;
14885     // The memory operands must not be volatile.
14886     if (Ld->isVolatile() || Ld->isIndexed())
14887       return;
14888   }
14889   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14890                             int64_t &Offset) -> bool {
14891     if (Other->isVolatile() || Other->isIndexed())
14892       return false;
14893     SDValue Val = peekThroughBitcasts(Other->getValue());
14894     // Allow merging constants of different types as integers.
14895     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14896                                            : Other->getMemoryVT() != MemVT;
14897     if (IsLoadSrc) {
14898       if (NoTypeMatch)
14899         return false;
14900       // The Load's Base Ptr must also match
14901       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14902         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14903         if (LoadVT != OtherLd->getMemoryVT())
14904           return false;
14905         // Loads must only have one use.
14906         if (!OtherLd->hasNUsesOfValue(1, 0))
14907           return false;
14908         // The memory operands must not be volatile.
14909         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14910           return false;
14911         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14912           return false;
14913       } else
14914         return false;
14915     }
14916     if (IsConstantSrc) {
14917       if (NoTypeMatch)
14918         return false;
14919       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14920         return false;
14921     }
14922     if (IsExtractVecSrc) {
14923       // Do not merge truncated stores here.
14924       if (Other->isTruncatingStore())
14925         return false;
14926       if (!MemVT.bitsEq(Val.getValueType()))
14927         return false;
14928       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14929           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14930         return false;
14931     }
14932     Ptr = BaseIndexOffset::match(Other, DAG);
14933     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14934   };
14935 
14936   // We looking for a root node which is an ancestor to all mergable
14937   // stores. We search up through a load, to our root and then down
14938   // through all children. For instance we will find Store{1,2,3} if
14939   // St is Store1, Store2. or Store3 where the root is not a load
14940   // which always true for nonvolatile ops. TODO: Expand
14941   // the search to find all valid candidates through multiple layers of loads.
14942   //
14943   // Root
14944   // |-------|-------|
14945   // Load    Load    Store3
14946   // |       |
14947   // Store1   Store2
14948   //
14949   // FIXME: We should be able to climb and
14950   // descend TokenFactors to find candidates as well.
14951 
14952   RootNode = St->getChain().getNode();
14953 
14954   unsigned NumNodesExplored = 0;
14955   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14956     RootNode = Ldn->getChain().getNode();
14957     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14958          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14959       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14960         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14961           if (I2.getOperandNo() == 0)
14962             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14963               BaseIndexOffset Ptr;
14964               int64_t PtrDiff;
14965               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14966                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14967             }
14968   } else
14969     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14970          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14971       if (I.getOperandNo() == 0)
14972         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14973           BaseIndexOffset Ptr;
14974           int64_t PtrDiff;
14975           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14976             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14977         }
14978 }
14979 
14980 // We need to check that merging these stores does not cause a loop in
14981 // the DAG. Any store candidate may depend on another candidate
14982 // indirectly through its operand (we already consider dependencies
14983 // through the chain). Check in parallel by searching up from
14984 // non-chain operands of candidates.
14985 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14986     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14987     SDNode *RootNode) {
14988   // FIXME: We should be able to truncate a full search of
14989   // predecessors by doing a BFS and keeping tabs the originating
14990   // stores from which worklist nodes come from in a similar way to
14991   // TokenFactor simplfication.
14992 
14993   SmallPtrSet<const SDNode *, 32> Visited;
14994   SmallVector<const SDNode *, 8> Worklist;
14995 
14996   // RootNode is a predecessor to all candidates so we need not search
14997   // past it. Add RootNode (peeking through TokenFactors). Do not count
14998   // these towards size check.
14999 
15000   Worklist.push_back(RootNode);
15001   while (!Worklist.empty()) {
15002     auto N = Worklist.pop_back_val();
15003     if (!Visited.insert(N).second)
15004       continue; // Already present in Visited.
15005     if (N->getOpcode() == ISD::TokenFactor) {
15006       for (SDValue Op : N->ops())
15007         Worklist.push_back(Op.getNode());
15008     }
15009   }
15010 
15011   // Don't count pruning nodes towards max.
15012   unsigned int Max = 1024 + Visited.size();
15013   // Search Ops of store candidates.
15014   for (unsigned i = 0; i < NumStores; ++i) {
15015     SDNode *N = StoreNodes[i].MemNode;
15016     // Of the 4 Store Operands:
15017     //   * Chain (Op 0) -> We have already considered these
15018     //                    in candidate selection and can be
15019     //                    safely ignored
15020     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15021     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15022     //                       but aren't necessarily fromt the same base node, so
15023     //                       cycles possible (e.g. via indexed store).
15024     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15025     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15026     //               and so can participate in a cycle.
15027     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15028       Worklist.push_back(N->getOperand(j).getNode());
15029   }
15030   // Search through DAG. We can stop early if we find a store node.
15031   for (unsigned i = 0; i < NumStores; ++i)
15032     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15033                                      Max))
15034       return false;
15035   return true;
15036 }
15037 
15038 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15039   if (OptLevel == CodeGenOpt::None)
15040     return false;
15041 
15042   EVT MemVT = St->getMemoryVT();
15043   int64_t ElementSizeBytes = MemVT.getStoreSize();
15044   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15045 
15046   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15047     return false;
15048 
15049   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15050       Attribute::NoImplicitFloat);
15051 
15052   // This function cannot currently deal with non-byte-sized memory sizes.
15053   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15054     return false;
15055 
15056   if (!MemVT.isSimple())
15057     return false;
15058 
15059   // Perform an early exit check. Do not bother looking at stored values that
15060   // are not constants, loads, or extracted vector elements.
15061   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15062   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15063   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15064                        isa<ConstantFPSDNode>(StoredVal);
15065   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15066                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15067 
15068   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15069     return false;
15070 
15071   SmallVector<MemOpLink, 8> StoreNodes;
15072   SDNode *RootNode;
15073   // Find potential store merge candidates by searching through chain sub-DAG
15074   getStoreMergeCandidates(St, StoreNodes, RootNode);
15075 
15076   // Check if there is anything to merge.
15077   if (StoreNodes.size() < 2)
15078     return false;
15079 
15080   // Sort the memory operands according to their distance from the
15081   // base pointer.
15082   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15083     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15084   });
15085 
15086   // Store Merge attempts to merge the lowest stores. This generally
15087   // works out as if successful, as the remaining stores are checked
15088   // after the first collection of stores is merged. However, in the
15089   // case that a non-mergeable store is found first, e.g., {p[-2],
15090   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15091   // mergeable cases. To prevent this, we prune such stores from the
15092   // front of StoreNodes here.
15093 
15094   bool RV = false;
15095   while (StoreNodes.size() > 1) {
15096     unsigned StartIdx = 0;
15097     while ((StartIdx + 1 < StoreNodes.size()) &&
15098            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15099                StoreNodes[StartIdx + 1].OffsetFromBase)
15100       ++StartIdx;
15101 
15102     // Bail if we don't have enough candidates to merge.
15103     if (StartIdx + 1 >= StoreNodes.size())
15104       return RV;
15105 
15106     if (StartIdx)
15107       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15108 
15109     // Scan the memory operations on the chain and find the first
15110     // non-consecutive store memory address.
15111     unsigned NumConsecutiveStores = 1;
15112     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15113     // Check that the addresses are consecutive starting from the second
15114     // element in the list of stores.
15115     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15116       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15117       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15118         break;
15119       NumConsecutiveStores = i + 1;
15120     }
15121 
15122     if (NumConsecutiveStores < 2) {
15123       StoreNodes.erase(StoreNodes.begin(),
15124                        StoreNodes.begin() + NumConsecutiveStores);
15125       continue;
15126     }
15127 
15128     // The node with the lowest store address.
15129     LLVMContext &Context = *DAG.getContext();
15130     const DataLayout &DL = DAG.getDataLayout();
15131 
15132     // Store the constants into memory as one consecutive store.
15133     if (IsConstantSrc) {
15134       while (NumConsecutiveStores >= 2) {
15135         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15136         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15137         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15138         unsigned LastLegalType = 1;
15139         unsigned LastLegalVectorType = 1;
15140         bool LastIntegerTrunc = false;
15141         bool NonZero = false;
15142         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15143         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15144           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15145           SDValue StoredVal = ST->getValue();
15146           bool IsElementZero = false;
15147           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15148             IsElementZero = C->isNullValue();
15149           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15150             IsElementZero = C->getConstantFPValue()->isNullValue();
15151           if (IsElementZero) {
15152             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15153               FirstZeroAfterNonZero = i;
15154           }
15155           NonZero |= !IsElementZero;
15156 
15157           // Find a legal type for the constant store.
15158           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15159           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15160           bool IsFast = false;
15161 
15162           // Break early when size is too large to be legal.
15163           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15164             break;
15165 
15166           if (TLI.isTypeLegal(StoreTy) &&
15167               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15168               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15169                                      FirstStoreAlign, &IsFast) &&
15170               IsFast) {
15171             LastIntegerTrunc = false;
15172             LastLegalType = i + 1;
15173             // Or check whether a truncstore is legal.
15174           } else if (TLI.getTypeAction(Context, StoreTy) ==
15175                      TargetLowering::TypePromoteInteger) {
15176             EVT LegalizedStoredValTy =
15177                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15178             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15179                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15180                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15181                                        FirstStoreAlign, &IsFast) &&
15182                 IsFast) {
15183               LastIntegerTrunc = true;
15184               LastLegalType = i + 1;
15185             }
15186           }
15187 
15188           // We only use vectors if the constant is known to be zero or the
15189           // target allows it and the function is not marked with the
15190           // noimplicitfloat attribute.
15191           if ((!NonZero ||
15192                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15193               !NoVectors) {
15194             // Find a legal type for the vector store.
15195             unsigned Elts = (i + 1) * NumMemElts;
15196             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15197             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15198                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15199                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15200                                        FirstStoreAlign, &IsFast) &&
15201                 IsFast)
15202               LastLegalVectorType = i + 1;
15203           }
15204         }
15205 
15206         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15207         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15208 
15209         // Check if we found a legal integer type that creates a meaningful
15210         // merge.
15211         if (NumElem < 2) {
15212           // We know that candidate stores are in order and of correct
15213           // shape. While there is no mergeable sequence from the
15214           // beginning one may start later in the sequence. The only
15215           // reason a merge of size N could have failed where another of
15216           // the same size would not have, is if the alignment has
15217           // improved or we've dropped a non-zero value. Drop as many
15218           // candidates as we can here.
15219           unsigned NumSkip = 1;
15220           while (
15221               (NumSkip < NumConsecutiveStores) &&
15222               (NumSkip < FirstZeroAfterNonZero) &&
15223               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15224             NumSkip++;
15225 
15226           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15227           NumConsecutiveStores -= NumSkip;
15228           continue;
15229         }
15230 
15231         // Check that we can merge these candidates without causing a cycle.
15232         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15233                                                       RootNode)) {
15234           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15235           NumConsecutiveStores -= NumElem;
15236           continue;
15237         }
15238 
15239         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15240                                               UseVector, LastIntegerTrunc);
15241 
15242         // Remove merged stores for next iteration.
15243         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15244         NumConsecutiveStores -= NumElem;
15245       }
15246       continue;
15247     }
15248 
15249     // When extracting multiple vector elements, try to store them
15250     // in one vector store rather than a sequence of scalar stores.
15251     if (IsExtractVecSrc) {
15252       // Loop on Consecutive Stores on success.
15253       while (NumConsecutiveStores >= 2) {
15254         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15255         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15256         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15257         unsigned NumStoresToMerge = 1;
15258         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15259           // Find a legal type for the vector store.
15260           unsigned Elts = (i + 1) * NumMemElts;
15261           EVT Ty =
15262               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15263           bool IsFast;
15264 
15265           // Break early when size is too large to be legal.
15266           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15267             break;
15268 
15269           if (TLI.isTypeLegal(Ty) &&
15270               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15271               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15272                                      FirstStoreAlign, &IsFast) &&
15273               IsFast)
15274             NumStoresToMerge = i + 1;
15275         }
15276 
15277         // Check if we found a legal integer type creating a meaningful
15278         // merge.
15279         if (NumStoresToMerge < 2) {
15280           // We know that candidate stores are in order and of correct
15281           // shape. While there is no mergeable sequence from the
15282           // beginning one may start later in the sequence. The only
15283           // reason a merge of size N could have failed where another of
15284           // the same size would not have, is if the alignment has
15285           // improved. Drop as many candidates as we can here.
15286           unsigned NumSkip = 1;
15287           while (
15288               (NumSkip < NumConsecutiveStores) &&
15289               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15290             NumSkip++;
15291 
15292           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15293           NumConsecutiveStores -= NumSkip;
15294           continue;
15295         }
15296 
15297         // Check that we can merge these candidates without causing a cycle.
15298         if (!checkMergeStoreCandidatesForDependencies(
15299                 StoreNodes, NumStoresToMerge, RootNode)) {
15300           StoreNodes.erase(StoreNodes.begin(),
15301                            StoreNodes.begin() + NumStoresToMerge);
15302           NumConsecutiveStores -= NumStoresToMerge;
15303           continue;
15304         }
15305 
15306         RV |= MergeStoresOfConstantsOrVecElts(
15307             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15308 
15309         StoreNodes.erase(StoreNodes.begin(),
15310                          StoreNodes.begin() + NumStoresToMerge);
15311         NumConsecutiveStores -= NumStoresToMerge;
15312       }
15313       continue;
15314     }
15315 
15316     // Below we handle the case of multiple consecutive stores that
15317     // come from multiple consecutive loads. We merge them into a single
15318     // wide load and a single wide store.
15319 
15320     // Look for load nodes which are used by the stored values.
15321     SmallVector<MemOpLink, 8> LoadNodes;
15322 
15323     // Find acceptable loads. Loads need to have the same chain (token factor),
15324     // must not be zext, volatile, indexed, and they must be consecutive.
15325     BaseIndexOffset LdBasePtr;
15326 
15327     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15328       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15329       SDValue Val = peekThroughBitcasts(St->getValue());
15330       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15331 
15332       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15333       // If this is not the first ptr that we check.
15334       int64_t LdOffset = 0;
15335       if (LdBasePtr.getBase().getNode()) {
15336         // The base ptr must be the same.
15337         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15338           break;
15339       } else {
15340         // Check that all other base pointers are the same as this one.
15341         LdBasePtr = LdPtr;
15342       }
15343 
15344       // We found a potential memory operand to merge.
15345       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15346     }
15347 
15348     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15349       // If we have load/store pair instructions and we only have two values,
15350       // don't bother merging.
15351       unsigned RequiredAlignment;
15352       if (LoadNodes.size() == 2 &&
15353           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15354           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15355         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15356         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15357         break;
15358       }
15359       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15360       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15361       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15362       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15363       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
15364       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15365 
15366       // Scan the memory operations on the chain and find the first
15367       // non-consecutive load memory address. These variables hold the index in
15368       // the store node array.
15369 
15370       unsigned LastConsecutiveLoad = 1;
15371 
15372       // This variable refers to the size and not index in the array.
15373       unsigned LastLegalVectorType = 1;
15374       unsigned LastLegalIntegerType = 1;
15375       bool isDereferenceable = true;
15376       bool DoIntegerTruncate = false;
15377       StartAddress = LoadNodes[0].OffsetFromBase;
15378       SDValue FirstChain = FirstLoad->getChain();
15379       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15380         // All loads must share the same chain.
15381         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15382           break;
15383 
15384         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15385         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15386           break;
15387         LastConsecutiveLoad = i;
15388 
15389         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15390           isDereferenceable = false;
15391 
15392         // Find a legal type for the vector store.
15393         unsigned Elts = (i + 1) * NumMemElts;
15394         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15395 
15396         // Break early when size is too large to be legal.
15397         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15398           break;
15399 
15400         bool IsFastSt, IsFastLd;
15401         if (TLI.isTypeLegal(StoreTy) &&
15402             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15403             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15404                                    FirstStoreAlign, &IsFastSt) &&
15405             IsFastSt &&
15406             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15407                                    FirstLoadAlign, &IsFastLd) &&
15408             IsFastLd) {
15409           LastLegalVectorType = i + 1;
15410         }
15411 
15412         // Find a legal type for the integer store.
15413         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15414         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15415         if (TLI.isTypeLegal(StoreTy) &&
15416             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15417             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15418                                    FirstStoreAlign, &IsFastSt) &&
15419             IsFastSt &&
15420             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15421                                    FirstLoadAlign, &IsFastLd) &&
15422             IsFastLd) {
15423           LastLegalIntegerType = i + 1;
15424           DoIntegerTruncate = false;
15425           // Or check whether a truncstore and extload is legal.
15426         } else if (TLI.getTypeAction(Context, StoreTy) ==
15427                    TargetLowering::TypePromoteInteger) {
15428           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15429           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15430               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15431               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15432                                  StoreTy) &&
15433               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15434                                  StoreTy) &&
15435               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15436               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15437                                      FirstStoreAlign, &IsFastSt) &&
15438               IsFastSt &&
15439               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15440                                      FirstLoadAlign, &IsFastLd) &&
15441               IsFastLd) {
15442             LastLegalIntegerType = i + 1;
15443             DoIntegerTruncate = true;
15444           }
15445         }
15446       }
15447 
15448       // Only use vector types if the vector type is larger than the integer
15449       // type. If they are the same, use integers.
15450       bool UseVectorTy =
15451           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15452       unsigned LastLegalType =
15453           std::max(LastLegalVectorType, LastLegalIntegerType);
15454 
15455       // We add +1 here because the LastXXX variables refer to location while
15456       // the NumElem refers to array/index size.
15457       unsigned NumElem =
15458           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15459       NumElem = std::min(LastLegalType, NumElem);
15460 
15461       if (NumElem < 2) {
15462         // We know that candidate stores are in order and of correct
15463         // shape. While there is no mergeable sequence from the
15464         // beginning one may start later in the sequence. The only
15465         // reason a merge of size N could have failed where another of
15466         // the same size would not have is if the alignment or either
15467         // the load or store has improved. Drop as many candidates as we
15468         // can here.
15469         unsigned NumSkip = 1;
15470         while ((NumSkip < LoadNodes.size()) &&
15471                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15472                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15473           NumSkip++;
15474         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15475         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15476         NumConsecutiveStores -= NumSkip;
15477         continue;
15478       }
15479 
15480       // Check that we can merge these candidates without causing a cycle.
15481       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15482                                                     RootNode)) {
15483         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15484         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15485         NumConsecutiveStores -= NumElem;
15486         continue;
15487       }
15488 
15489       // Find if it is better to use vectors or integers to load and store
15490       // to memory.
15491       EVT JointMemOpVT;
15492       if (UseVectorTy) {
15493         // Find a legal type for the vector store.
15494         unsigned Elts = NumElem * NumMemElts;
15495         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15496       } else {
15497         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15498         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15499       }
15500 
15501       SDLoc LoadDL(LoadNodes[0].MemNode);
15502       SDLoc StoreDL(StoreNodes[0].MemNode);
15503 
15504       // The merged loads are required to have the same incoming chain, so
15505       // using the first's chain is acceptable.
15506 
15507       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15508       AddToWorklist(NewStoreChain.getNode());
15509 
15510       MachineMemOperand::Flags MMOFlags =
15511           isDereferenceable ? MachineMemOperand::MODereferenceable
15512                             : MachineMemOperand::MONone;
15513 
15514       SDValue NewLoad, NewStore;
15515       if (UseVectorTy || !DoIntegerTruncate) {
15516         NewLoad =
15517             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15518                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15519                         FirstLoadAlign, MMOFlags);
15520         NewStore = DAG.getStore(
15521             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15522             FirstInChain->getPointerInfo(), FirstStoreAlign);
15523       } else { // This must be the truncstore/extload case
15524         EVT ExtendedTy =
15525             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15526         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15527                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15528                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15529                                  FirstLoadAlign, MMOFlags);
15530         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15531                                      FirstInChain->getBasePtr(),
15532                                      FirstInChain->getPointerInfo(),
15533                                      JointMemOpVT, FirstInChain->getAlignment(),
15534                                      FirstInChain->getMemOperand()->getFlags());
15535       }
15536 
15537       // Transfer chain users from old loads to the new load.
15538       for (unsigned i = 0; i < NumElem; ++i) {
15539         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15540         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15541                                       SDValue(NewLoad.getNode(), 1));
15542       }
15543 
15544       // Replace the all stores with the new store. Recursively remove
15545       // corresponding value if its no longer used.
15546       for (unsigned i = 0; i < NumElem; ++i) {
15547         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15548         CombineTo(StoreNodes[i].MemNode, NewStore);
15549         if (Val.getNode()->use_empty())
15550           recursivelyDeleteUnusedNodes(Val.getNode());
15551       }
15552 
15553       RV = true;
15554       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15555       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15556       NumConsecutiveStores -= NumElem;
15557     }
15558   }
15559   return RV;
15560 }
15561 
15562 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15563   SDLoc SL(ST);
15564   SDValue ReplStore;
15565 
15566   // Replace the chain to avoid dependency.
15567   if (ST->isTruncatingStore()) {
15568     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15569                                   ST->getBasePtr(), ST->getMemoryVT(),
15570                                   ST->getMemOperand());
15571   } else {
15572     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15573                              ST->getMemOperand());
15574   }
15575 
15576   // Create token to keep both nodes around.
15577   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15578                               MVT::Other, ST->getChain(), ReplStore);
15579 
15580   // Make sure the new and old chains are cleaned up.
15581   AddToWorklist(Token.getNode());
15582 
15583   // Don't add users to work list.
15584   return CombineTo(ST, Token, false);
15585 }
15586 
15587 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15588   SDValue Value = ST->getValue();
15589   if (Value.getOpcode() == ISD::TargetConstantFP)
15590     return SDValue();
15591 
15592   SDLoc DL(ST);
15593 
15594   SDValue Chain = ST->getChain();
15595   SDValue Ptr = ST->getBasePtr();
15596 
15597   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15598 
15599   // NOTE: If the original store is volatile, this transform must not increase
15600   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15601   // processor operation but an i64 (which is not legal) requires two.  So the
15602   // transform should not be done in this case.
15603 
15604   SDValue Tmp;
15605   switch (CFP->getSimpleValueType(0).SimpleTy) {
15606   default:
15607     llvm_unreachable("Unknown FP type");
15608   case MVT::f16:    // We don't do this for these yet.
15609   case MVT::f80:
15610   case MVT::f128:
15611   case MVT::ppcf128:
15612     return SDValue();
15613   case MVT::f32:
15614     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15615         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15616       ;
15617       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15618                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15619                             MVT::i32);
15620       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15621     }
15622 
15623     return SDValue();
15624   case MVT::f64:
15625     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15626          !ST->isVolatile()) ||
15627         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15628       ;
15629       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15630                             getZExtValue(), SDLoc(CFP), MVT::i64);
15631       return DAG.getStore(Chain, DL, Tmp,
15632                           Ptr, ST->getMemOperand());
15633     }
15634 
15635     if (!ST->isVolatile() &&
15636         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15637       // Many FP stores are not made apparent until after legalize, e.g. for
15638       // argument passing.  Since this is so common, custom legalize the
15639       // 64-bit integer store into two 32-bit stores.
15640       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15641       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15642       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15643       if (DAG.getDataLayout().isBigEndian())
15644         std::swap(Lo, Hi);
15645 
15646       unsigned Alignment = ST->getAlignment();
15647       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15648       AAMDNodes AAInfo = ST->getAAInfo();
15649 
15650       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15651                                  ST->getAlignment(), MMOFlags, AAInfo);
15652       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15653                         DAG.getConstant(4, DL, Ptr.getValueType()));
15654       Alignment = MinAlign(Alignment, 4U);
15655       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15656                                  ST->getPointerInfo().getWithOffset(4),
15657                                  Alignment, MMOFlags, AAInfo);
15658       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15659                          St0, St1);
15660     }
15661 
15662     return SDValue();
15663   }
15664 }
15665 
15666 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15667   StoreSDNode *ST  = cast<StoreSDNode>(N);
15668   SDValue Chain = ST->getChain();
15669   SDValue Value = ST->getValue();
15670   SDValue Ptr   = ST->getBasePtr();
15671 
15672   // If this is a store of a bit convert, store the input value if the
15673   // resultant store does not need a higher alignment than the original.
15674   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15675       ST->isUnindexed()) {
15676     EVT SVT = Value.getOperand(0).getValueType();
15677     // If the store is volatile, we only want to change the store type if the
15678     // resulting store is legal. Otherwise we might increase the number of
15679     // memory accesses. We don't care if the original type was legal or not
15680     // as we assume software couldn't rely on the number of accesses of an
15681     // illegal type.
15682     if (((!LegalOperations && !ST->isVolatile()) ||
15683          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15684         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15685       unsigned OrigAlign = ST->getAlignment();
15686       bool Fast = false;
15687       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15688                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
15689           Fast) {
15690         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15691                             ST->getPointerInfo(), OrigAlign,
15692                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
15693       }
15694     }
15695   }
15696 
15697   // Turn 'store undef, Ptr' -> nothing.
15698   if (Value.isUndef() && ST->isUnindexed())
15699     return Chain;
15700 
15701   // Try to infer better alignment information than the store already has.
15702   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15703     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15704       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15705         SDValue NewStore =
15706             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15707                               ST->getMemoryVT(), Align,
15708                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
15709         // NewStore will always be N as we are only refining the alignment
15710         assert(NewStore.getNode() == N);
15711         (void)NewStore;
15712       }
15713     }
15714   }
15715 
15716   // Try transforming a pair floating point load / store ops to integer
15717   // load / store ops.
15718   if (SDValue NewST = TransformFPLoadStorePair(N))
15719     return NewST;
15720 
15721   if (ST->isUnindexed()) {
15722     // Walk up chain skipping non-aliasing memory nodes, on this store and any
15723     // adjacent stores.
15724     if (findBetterNeighborChains(ST)) {
15725       // replaceStoreChain uses CombineTo, which handled all of the worklist
15726       // manipulation. Return the original node to not do anything else.
15727       return SDValue(ST, 0);
15728     }
15729     Chain = ST->getChain();
15730   }
15731 
15732   // FIXME: is there such a thing as a truncating indexed store?
15733   if (ST->isTruncatingStore() && ST->isUnindexed() &&
15734       Value.getValueType().isInteger() &&
15735       (!isa<ConstantSDNode>(Value) ||
15736        !cast<ConstantSDNode>(Value)->isOpaque())) {
15737     APInt TruncDemandedBits =
15738         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15739                              ST->getMemoryVT().getScalarSizeInBits());
15740 
15741     // See if we can simplify the input to this truncstore with knowledge that
15742     // only the low bits are being used.  For example:
15743     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
15744     SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
15745     AddToWorklist(Value.getNode());
15746     if (Shorter)
15747       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
15748                                ST->getMemOperand());
15749 
15750     // Otherwise, see if we can simplify the operation with
15751     // SimplifyDemandedBits, which only works if the value has a single use.
15752     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
15753       // Re-visit the store if anything changed and the store hasn't been merged
15754       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15755       // node back to the worklist if necessary, but we also need to re-visit
15756       // the Store node itself.
15757       if (N->getOpcode() != ISD::DELETED_NODE)
15758         AddToWorklist(N);
15759       return SDValue(N, 0);
15760     }
15761   }
15762 
15763   // If this is a load followed by a store to the same location, then the store
15764   // is dead/noop.
15765   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15766     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15767         ST->isUnindexed() && !ST->isVolatile() &&
15768         // There can't be any side effects between the load and store, such as
15769         // a call or store.
15770         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15771       // The store is dead, remove it.
15772       return Chain;
15773     }
15774   }
15775 
15776   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15777     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15778         !ST1->isVolatile()) {
15779       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
15780           ST->getMemoryVT() == ST1->getMemoryVT()) {
15781         // If this is a store followed by a store with the same value to the
15782         // same location, then the store is dead/noop.
15783         return Chain;
15784       }
15785 
15786       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15787           !ST1->getBasePtr().isUndef()) {
15788         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
15789         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
15790         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
15791         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
15792         // If this is a store who's preceding store to a subset of the current
15793         // location and no one other node is chained to that store we can
15794         // effectively drop the store. Do not remove stores to undef as they may
15795         // be used as data sinks.
15796         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
15797           CombineTo(ST1, ST1->getChain());
15798           return SDValue();
15799         }
15800 
15801         // If ST stores to a subset of preceding store's write set, we may be
15802         // able to fold ST's value into the preceding stored value. As we know
15803         // the other uses of ST1's chain are unconcerned with ST, this folding
15804         // will not affect those nodes.
15805         int64_t BitOffset;
15806         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
15807                                BitOffset)) {
15808           SDValue ChainValue = ST1->getValue();
15809           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
15810             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
15811               APInt Val = C1->getAPIntValue();
15812               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
15813               // FIXME: Handle Big-endian mode.
15814               if (!DAG.getDataLayout().isBigEndian()) {
15815                 Val.insertBits(InsertVal, BitOffset);
15816                 SDValue NewSDVal =
15817                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
15818                                     C1->isTargetOpcode(), C1->isOpaque());
15819                 SDNode *NewST1 = DAG.UpdateNodeOperands(
15820                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
15821                     ST1->getOperand(3));
15822                 return CombineTo(ST, SDValue(NewST1, 0));
15823               }
15824             }
15825           }
15826         } // End ST subset of ST1 case.
15827       }
15828     }
15829   }
15830 
15831   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15832   // truncating store.  We can do this even if this is already a truncstore.
15833   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15834       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15835       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15836                             ST->getMemoryVT())) {
15837     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15838                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15839   }
15840 
15841   // Always perform this optimization before types are legal. If the target
15842   // prefers, also try this after legalization to catch stores that were created
15843   // by intrinsics or other nodes.
15844   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15845     while (true) {
15846       // There can be multiple store sequences on the same chain.
15847       // Keep trying to merge store sequences until we are unable to do so
15848       // or until we merge the last store on the chain.
15849       bool Changed = MergeConsecutiveStores(ST);
15850       if (!Changed) break;
15851       // Return N as merge only uses CombineTo and no worklist clean
15852       // up is necessary.
15853       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15854         return SDValue(N, 0);
15855     }
15856   }
15857 
15858   // Try transforming N to an indexed store.
15859   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15860     return SDValue(N, 0);
15861 
15862   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15863   //
15864   // Make sure to do this only after attempting to merge stores in order to
15865   //  avoid changing the types of some subset of stores due to visit order,
15866   //  preventing their merging.
15867   if (isa<ConstantFPSDNode>(ST->getValue())) {
15868     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15869       return NewSt;
15870   }
15871 
15872   if (SDValue NewSt = splitMergedValStore(ST))
15873     return NewSt;
15874 
15875   return ReduceLoadOpStoreWidth(N);
15876 }
15877 
15878 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
15879   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
15880   if (!LifetimeEnd->hasOffset())
15881     return SDValue();
15882 
15883   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
15884                                         LifetimeEnd->getOffset(), false);
15885 
15886   // We walk up the chains to find stores.
15887   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
15888   while (!Chains.empty()) {
15889     SDValue Chain = Chains.back();
15890     Chains.pop_back();
15891     if (!Chain.hasOneUse())
15892       continue;
15893     switch (Chain.getOpcode()) {
15894     case ISD::TokenFactor:
15895       for (unsigned Nops = Chain.getNumOperands(); Nops;)
15896         Chains.push_back(Chain.getOperand(--Nops));
15897       break;
15898     case ISD::LIFETIME_START:
15899     case ISD::LIFETIME_END:
15900       // We can forward past any lifetime start/end that can be proven not to
15901       // alias the node.
15902       if (!isAlias(Chain.getNode(), N))
15903         Chains.push_back(Chain.getOperand(0));
15904       break;
15905     case ISD::STORE: {
15906       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
15907       if (ST->isVolatile() || ST->isIndexed())
15908         continue;
15909       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
15910       // If we store purely within object bounds just before its lifetime ends,
15911       // we can remove the store.
15912       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
15913                                    ST->getMemoryVT().getStoreSizeInBits())) {
15914         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
15915                    dbgs() << "\nwithin LIFETIME_END of : ";
15916                    LifetimeEndBase.dump(); dbgs() << "\n");
15917         CombineTo(ST, ST->getChain());
15918         return SDValue(N, 0);
15919       }
15920     }
15921     }
15922   }
15923   return SDValue();
15924 }
15925 
15926 /// For the instruction sequence of store below, F and I values
15927 /// are bundled together as an i64 value before being stored into memory.
15928 /// Sometimes it is more efficent to generate separate stores for F and I,
15929 /// which can remove the bitwise instructions or sink them to colder places.
15930 ///
15931 ///   (store (or (zext (bitcast F to i32) to i64),
15932 ///              (shl (zext I to i64), 32)), addr)  -->
15933 ///   (store F, addr) and (store I, addr+4)
15934 ///
15935 /// Similarly, splitting for other merged store can also be beneficial, like:
15936 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15937 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15938 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15939 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15940 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15941 ///
15942 /// We allow each target to determine specifically which kind of splitting is
15943 /// supported.
15944 ///
15945 /// The store patterns are commonly seen from the simple code snippet below
15946 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15947 ///   void goo(const std::pair<int, float> &);
15948 ///   hoo() {
15949 ///     ...
15950 ///     goo(std::make_pair(tmp, ftmp));
15951 ///     ...
15952 ///   }
15953 ///
15954 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15955   if (OptLevel == CodeGenOpt::None)
15956     return SDValue();
15957 
15958   SDValue Val = ST->getValue();
15959   SDLoc DL(ST);
15960 
15961   // Match OR operand.
15962   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15963     return SDValue();
15964 
15965   // Match SHL operand and get Lower and Higher parts of Val.
15966   SDValue Op1 = Val.getOperand(0);
15967   SDValue Op2 = Val.getOperand(1);
15968   SDValue Lo, Hi;
15969   if (Op1.getOpcode() != ISD::SHL) {
15970     std::swap(Op1, Op2);
15971     if (Op1.getOpcode() != ISD::SHL)
15972       return SDValue();
15973   }
15974   Lo = Op2;
15975   Hi = Op1.getOperand(0);
15976   if (!Op1.hasOneUse())
15977     return SDValue();
15978 
15979   // Match shift amount to HalfValBitSize.
15980   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15981   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15982   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15983     return SDValue();
15984 
15985   // Lo and Hi are zero-extended from int with size less equal than 32
15986   // to i64.
15987   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15988       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15989       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15990       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15991       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15992       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15993     return SDValue();
15994 
15995   // Use the EVT of low and high parts before bitcast as the input
15996   // of target query.
15997   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15998                   ? Lo.getOperand(0).getValueType()
15999                   : Lo.getValueType();
16000   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16001                    ? Hi.getOperand(0).getValueType()
16002                    : Hi.getValueType();
16003   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16004     return SDValue();
16005 
16006   // Start to split store.
16007   unsigned Alignment = ST->getAlignment();
16008   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16009   AAMDNodes AAInfo = ST->getAAInfo();
16010 
16011   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16012   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16013   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16014   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16015 
16016   SDValue Chain = ST->getChain();
16017   SDValue Ptr = ST->getBasePtr();
16018   // Lower value store.
16019   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16020                              ST->getAlignment(), MMOFlags, AAInfo);
16021   Ptr =
16022       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16023                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16024   // Higher value store.
16025   SDValue St1 =
16026       DAG.getStore(St0, DL, Hi, Ptr,
16027                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16028                    Alignment / 2, MMOFlags, AAInfo);
16029   return St1;
16030 }
16031 
16032 /// Convert a disguised subvector insertion into a shuffle:
16033 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
16034 /// bitcast(shuffle (bitcast V), (extended X), Mask)
16035 /// Note: We do not use an insert_subvector node because that requires a legal
16036 /// subvector type.
16037 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16038   SDValue InsertVal = N->getOperand(1);
16039   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16040       !InsertVal.getOperand(0).getValueType().isVector())
16041     return SDValue();
16042 
16043   SDValue SubVec = InsertVal.getOperand(0);
16044   SDValue DestVec = N->getOperand(0);
16045   EVT SubVecVT = SubVec.getValueType();
16046   EVT VT = DestVec.getValueType();
16047   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16048   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16049   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16050 
16051   // Step 1: Create a shuffle mask that implements this insert operation. The
16052   // vector that we are inserting into will be operand 0 of the shuffle, so
16053   // those elements are just 'i'. The inserted subvector is in the first
16054   // positions of operand 1 of the shuffle. Example:
16055   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16056   SmallVector<int, 16> Mask(NumMaskVals);
16057   for (unsigned i = 0; i != NumMaskVals; ++i) {
16058     if (i / NumSrcElts == InsIndex)
16059       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16060     else
16061       Mask[i] = i;
16062   }
16063 
16064   // Bail out if the target can not handle the shuffle we want to create.
16065   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16066   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16067   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16068     return SDValue();
16069 
16070   // Step 2: Create a wide vector from the inserted source vector by appending
16071   // undefined elements. This is the same size as our destination vector.
16072   SDLoc DL(N);
16073   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16074   ConcatOps[0] = SubVec;
16075   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16076 
16077   // Step 3: Shuffle in the padded subvector.
16078   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16079   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16080   AddToWorklist(PaddedSubV.getNode());
16081   AddToWorklist(DestVecBC.getNode());
16082   AddToWorklist(Shuf.getNode());
16083   return DAG.getBitcast(VT, Shuf);
16084 }
16085 
16086 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16087   SDValue InVec = N->getOperand(0);
16088   SDValue InVal = N->getOperand(1);
16089   SDValue EltNo = N->getOperand(2);
16090   SDLoc DL(N);
16091 
16092   // If the inserted element is an UNDEF, just use the input vector.
16093   if (InVal.isUndef())
16094     return InVec;
16095 
16096   EVT VT = InVec.getValueType();
16097   unsigned NumElts = VT.getVectorNumElements();
16098 
16099   // Remove redundant insertions:
16100   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16101   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16102       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16103     return InVec;
16104 
16105   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16106   if (!IndexC) {
16107     // If this is variable insert to undef vector, it might be better to splat:
16108     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16109     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16110       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16111       return DAG.getBuildVector(VT, DL, Ops);
16112     }
16113     return SDValue();
16114   }
16115 
16116   // We must know which element is being inserted for folds below here.
16117   unsigned Elt = IndexC->getZExtValue();
16118   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16119     return Shuf;
16120 
16121   // Canonicalize insert_vector_elt dag nodes.
16122   // Example:
16123   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16124   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16125   //
16126   // Do this only if the child insert_vector node has one use; also
16127   // do this only if indices are both constants and Idx1 < Idx0.
16128   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16129       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16130     unsigned OtherElt = InVec.getConstantOperandVal(2);
16131     if (Elt < OtherElt) {
16132       // Swap nodes.
16133       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16134                                   InVec.getOperand(0), InVal, EltNo);
16135       AddToWorklist(NewOp.getNode());
16136       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16137                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16138     }
16139   }
16140 
16141   // If we can't generate a legal BUILD_VECTOR, exit
16142   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16143     return SDValue();
16144 
16145   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16146   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16147   // vector elements.
16148   SmallVector<SDValue, 8> Ops;
16149   // Do not combine these two vectors if the output vector will not replace
16150   // the input vector.
16151   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16152     Ops.append(InVec.getNode()->op_begin(),
16153                InVec.getNode()->op_end());
16154   } else if (InVec.isUndef()) {
16155     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16156   } else {
16157     return SDValue();
16158   }
16159   assert(Ops.size() == NumElts && "Unexpected vector size");
16160 
16161   // Insert the element
16162   if (Elt < Ops.size()) {
16163     // All the operands of BUILD_VECTOR must have the same type;
16164     // we enforce that here.
16165     EVT OpVT = Ops[0].getValueType();
16166     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16167   }
16168 
16169   // Return the new vector
16170   return DAG.getBuildVector(VT, DL, Ops);
16171 }
16172 
16173 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16174                                                   SDValue EltNo,
16175                                                   LoadSDNode *OriginalLoad) {
16176   assert(!OriginalLoad->isVolatile());
16177 
16178   EVT ResultVT = EVE->getValueType(0);
16179   EVT VecEltVT = InVecVT.getVectorElementType();
16180   unsigned Align = OriginalLoad->getAlignment();
16181   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16182       VecEltVT.getTypeForEVT(*DAG.getContext()));
16183 
16184   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16185     return SDValue();
16186 
16187   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16188     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16189   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16190     return SDValue();
16191 
16192   Align = NewAlign;
16193 
16194   SDValue NewPtr = OriginalLoad->getBasePtr();
16195   SDValue Offset;
16196   EVT PtrType = NewPtr.getValueType();
16197   MachinePointerInfo MPI;
16198   SDLoc DL(EVE);
16199   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16200     int Elt = ConstEltNo->getZExtValue();
16201     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16202     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16203     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16204   } else {
16205     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16206     Offset = DAG.getNode(
16207         ISD::MUL, DL, PtrType, Offset,
16208         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16209     // Discard the pointer info except the address space because the memory
16210     // operand can't represent this new access since the offset is variable.
16211     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16212   }
16213   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16214 
16215   // The replacement we need to do here is a little tricky: we need to
16216   // replace an extractelement of a load with a load.
16217   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16218   // Note that this replacement assumes that the extractvalue is the only
16219   // use of the load; that's okay because we don't want to perform this
16220   // transformation in other cases anyway.
16221   SDValue Load;
16222   SDValue Chain;
16223   if (ResultVT.bitsGT(VecEltVT)) {
16224     // If the result type of vextract is wider than the load, then issue an
16225     // extending load instead.
16226     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16227                                                   VecEltVT)
16228                                    ? ISD::ZEXTLOAD
16229                                    : ISD::EXTLOAD;
16230     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16231                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16232                           Align, OriginalLoad->getMemOperand()->getFlags(),
16233                           OriginalLoad->getAAInfo());
16234     Chain = Load.getValue(1);
16235   } else {
16236     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16237                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16238                        OriginalLoad->getAAInfo());
16239     Chain = Load.getValue(1);
16240     if (ResultVT.bitsLT(VecEltVT))
16241       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16242     else
16243       Load = DAG.getBitcast(ResultVT, Load);
16244   }
16245   WorklistRemover DeadNodes(*this);
16246   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16247   SDValue To[] = { Load, Chain };
16248   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16249   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16250   // worklist explicitly as well.
16251   AddToWorklist(Load.getNode());
16252   AddUsersToWorklist(Load.getNode()); // Add users too
16253   // Make sure to revisit this node to clean it up; it will usually be dead.
16254   AddToWorklist(EVE);
16255   ++OpsNarrowed;
16256   return SDValue(EVE, 0);
16257 }
16258 
16259 /// Transform a vector binary operation into a scalar binary operation by moving
16260 /// the math/logic after an extract element of a vector.
16261 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16262                                        bool LegalOperations) {
16263   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16264   SDValue Vec = ExtElt->getOperand(0);
16265   SDValue Index = ExtElt->getOperand(1);
16266   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16267   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16268       Vec.getNode()->getNumValues() != 1)
16269     return SDValue();
16270 
16271   // Targets may want to avoid this to prevent an expensive register transfer.
16272   if (!TLI.shouldScalarizeBinop(Vec))
16273     return SDValue();
16274 
16275   // Extracting an element of a vector constant is constant-folded, so this
16276   // transform is just replacing a vector op with a scalar op while moving the
16277   // extract.
16278   SDValue Op0 = Vec.getOperand(0);
16279   SDValue Op1 = Vec.getOperand(1);
16280   if (isAnyConstantBuildVector(Op0, true) ||
16281       isAnyConstantBuildVector(Op1, true)) {
16282     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16283     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16284     SDLoc DL(ExtElt);
16285     EVT VT = ExtElt->getValueType(0);
16286     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16287     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16288     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16289   }
16290 
16291   return SDValue();
16292 }
16293 
16294 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16295   SDValue VecOp = N->getOperand(0);
16296   SDValue Index = N->getOperand(1);
16297   EVT ScalarVT = N->getValueType(0);
16298   EVT VecVT = VecOp.getValueType();
16299   if (VecOp.isUndef())
16300     return DAG.getUNDEF(ScalarVT);
16301 
16302   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16303   //
16304   // This only really matters if the index is non-constant since other combines
16305   // on the constant elements already work.
16306   SDLoc DL(N);
16307   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16308       Index == VecOp.getOperand(2)) {
16309     SDValue Elt = VecOp.getOperand(1);
16310     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16311   }
16312 
16313   // (vextract (scalar_to_vector val, 0) -> val
16314   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16315     // Check if the result type doesn't match the inserted element type. A
16316     // SCALAR_TO_VECTOR may truncate the inserted element and the
16317     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16318     SDValue InOp = VecOp.getOperand(0);
16319     if (InOp.getValueType() != ScalarVT) {
16320       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16321       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16322     }
16323     return InOp;
16324   }
16325 
16326   // extract_vector_elt of out-of-bounds element -> UNDEF
16327   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16328   unsigned NumElts = VecVT.getVectorNumElements();
16329   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16330     return DAG.getUNDEF(ScalarVT);
16331 
16332   // extract_vector_elt (build_vector x, y), 1 -> y
16333   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16334       TLI.isTypeLegal(VecVT) &&
16335       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16336     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16337     EVT InEltVT = Elt.getValueType();
16338 
16339     // Sometimes build_vector's scalar input types do not match result type.
16340     if (ScalarVT == InEltVT)
16341       return Elt;
16342 
16343     // TODO: It may be useful to truncate if free if the build_vector implicitly
16344     // converts.
16345   }
16346 
16347   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16348   // there are regressions on multiple targets without it. We can end up with a
16349   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16350   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16351       VecOp.hasOneUse()) {
16352     // The vector index of the LSBs of the source depend on the endian-ness.
16353     bool IsLE = DAG.getDataLayout().isLittleEndian();
16354     unsigned ExtractIndex = IndexC->getZExtValue();
16355     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16356     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16357     SDValue BCSrc = VecOp.getOperand(0);
16358     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16359       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16360 
16361     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16362         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16363       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16364       // trunc i64 X to i32
16365       SDValue X = BCSrc.getOperand(0);
16366       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16367              "Extract element and scalar to vector can't change element type "
16368              "from FP to integer.");
16369       unsigned XBitWidth = X.getValueSizeInBits();
16370       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16371       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16372 
16373       // An extract element return value type can be wider than its vector
16374       // operand element type. In that case, the high bits are undefined, so
16375       // it's possible that we may need to extend rather than truncate.
16376       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16377         assert(XBitWidth % VecEltBitWidth == 0 &&
16378                "Scalar bitwidth must be a multiple of vector element bitwidth");
16379         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16380       }
16381     }
16382   }
16383 
16384   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16385     return BO;
16386 
16387   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16388   // We only perform this optimization before the op legalization phase because
16389   // we may introduce new vector instructions which are not backed by TD
16390   // patterns. For example on AVX, extracting elements from a wide vector
16391   // without using extract_subvector. However, if we can find an underlying
16392   // scalar value, then we can always use that.
16393   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16394     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16395     // Find the new index to extract from.
16396     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16397 
16398     // Extracting an undef index is undef.
16399     if (OrigElt == -1)
16400       return DAG.getUNDEF(ScalarVT);
16401 
16402     // Select the right vector half to extract from.
16403     SDValue SVInVec;
16404     if (OrigElt < (int)NumElts) {
16405       SVInVec = VecOp.getOperand(0);
16406     } else {
16407       SVInVec = VecOp.getOperand(1);
16408       OrigElt -= NumElts;
16409     }
16410 
16411     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16412       SDValue InOp = SVInVec.getOperand(OrigElt);
16413       if (InOp.getValueType() != ScalarVT) {
16414         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16415         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16416       }
16417 
16418       return InOp;
16419     }
16420 
16421     // FIXME: We should handle recursing on other vector shuffles and
16422     // scalar_to_vector here as well.
16423 
16424     if (!LegalOperations ||
16425         // FIXME: Should really be just isOperationLegalOrCustom.
16426         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16427         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16428       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16429       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16430                          DAG.getConstant(OrigElt, DL, IndexTy));
16431     }
16432   }
16433 
16434   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16435   // simplify it based on the (valid) extraction indices.
16436   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16437         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16438                Use->getOperand(0) == VecOp &&
16439                isa<ConstantSDNode>(Use->getOperand(1));
16440       })) {
16441     APInt DemandedElts = APInt::getNullValue(NumElts);
16442     for (SDNode *Use : VecOp->uses()) {
16443       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16444       if (CstElt->getAPIntValue().ult(NumElts))
16445         DemandedElts.setBit(CstElt->getZExtValue());
16446     }
16447     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16448       // We simplified the vector operand of this extract element. If this
16449       // extract is not dead, visit it again so it is folded properly.
16450       if (N->getOpcode() != ISD::DELETED_NODE)
16451         AddToWorklist(N);
16452       return SDValue(N, 0);
16453     }
16454   }
16455 
16456   // Everything under here is trying to match an extract of a loaded value.
16457   // If the result of load has to be truncated, then it's not necessarily
16458   // profitable.
16459   bool BCNumEltsChanged = false;
16460   EVT ExtVT = VecVT.getVectorElementType();
16461   EVT LVT = ExtVT;
16462   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16463     return SDValue();
16464 
16465   if (VecOp.getOpcode() == ISD::BITCAST) {
16466     // Don't duplicate a load with other uses.
16467     if (!VecOp.hasOneUse())
16468       return SDValue();
16469 
16470     EVT BCVT = VecOp.getOperand(0).getValueType();
16471     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16472       return SDValue();
16473     if (NumElts != BCVT.getVectorNumElements())
16474       BCNumEltsChanged = true;
16475     VecOp = VecOp.getOperand(0);
16476     ExtVT = BCVT.getVectorElementType();
16477   }
16478 
16479   // extract (vector load $addr), i --> load $addr + i * size
16480   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16481       ISD::isNormalLoad(VecOp.getNode()) &&
16482       !Index->hasPredecessor(VecOp.getNode())) {
16483     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16484     if (VecLoad && !VecLoad->isVolatile())
16485       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16486   }
16487 
16488   // Perform only after legalization to ensure build_vector / vector_shuffle
16489   // optimizations have already been done.
16490   if (!LegalOperations || !IndexC)
16491     return SDValue();
16492 
16493   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16494   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16495   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16496   int Elt = IndexC->getZExtValue();
16497   LoadSDNode *LN0 = nullptr;
16498   if (ISD::isNormalLoad(VecOp.getNode())) {
16499     LN0 = cast<LoadSDNode>(VecOp);
16500   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16501              VecOp.getOperand(0).getValueType() == ExtVT &&
16502              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16503     // Don't duplicate a load with other uses.
16504     if (!VecOp.hasOneUse())
16505       return SDValue();
16506 
16507     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16508   }
16509   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16510     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16511     // =>
16512     // (load $addr+1*size)
16513 
16514     // Don't duplicate a load with other uses.
16515     if (!VecOp.hasOneUse())
16516       return SDValue();
16517 
16518     // If the bit convert changed the number of elements, it is unsafe
16519     // to examine the mask.
16520     if (BCNumEltsChanged)
16521       return SDValue();
16522 
16523     // Select the input vector, guarding against out of range extract vector.
16524     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16525     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16526 
16527     if (VecOp.getOpcode() == ISD::BITCAST) {
16528       // Don't duplicate a load with other uses.
16529       if (!VecOp.hasOneUse())
16530         return SDValue();
16531 
16532       VecOp = VecOp.getOperand(0);
16533     }
16534     if (ISD::isNormalLoad(VecOp.getNode())) {
16535       LN0 = cast<LoadSDNode>(VecOp);
16536       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16537       Index = DAG.getConstant(Elt, DL, Index.getValueType());
16538     }
16539   }
16540 
16541   // Make sure we found a non-volatile load and the extractelement is
16542   // the only use.
16543   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
16544     return SDValue();
16545 
16546   // If Idx was -1 above, Elt is going to be -1, so just return undef.
16547   if (Elt == -1)
16548     return DAG.getUNDEF(LVT);
16549 
16550   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16551 }
16552 
16553 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16554 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16555   // We perform this optimization post type-legalization because
16556   // the type-legalizer often scalarizes integer-promoted vectors.
16557   // Performing this optimization before may create bit-casts which
16558   // will be type-legalized to complex code sequences.
16559   // We perform this optimization only before the operation legalizer because we
16560   // may introduce illegal operations.
16561   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16562     return SDValue();
16563 
16564   unsigned NumInScalars = N->getNumOperands();
16565   SDLoc DL(N);
16566   EVT VT = N->getValueType(0);
16567 
16568   // Check to see if this is a BUILD_VECTOR of a bunch of values
16569   // which come from any_extend or zero_extend nodes. If so, we can create
16570   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16571   // optimizations. We do not handle sign-extend because we can't fill the sign
16572   // using shuffles.
16573   EVT SourceType = MVT::Other;
16574   bool AllAnyExt = true;
16575 
16576   for (unsigned i = 0; i != NumInScalars; ++i) {
16577     SDValue In = N->getOperand(i);
16578     // Ignore undef inputs.
16579     if (In.isUndef()) continue;
16580 
16581     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
16582     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16583 
16584     // Abort if the element is not an extension.
16585     if (!ZeroExt && !AnyExt) {
16586       SourceType = MVT::Other;
16587       break;
16588     }
16589 
16590     // The input is a ZeroExt or AnyExt. Check the original type.
16591     EVT InTy = In.getOperand(0).getValueType();
16592 
16593     // Check that all of the widened source types are the same.
16594     if (SourceType == MVT::Other)
16595       // First time.
16596       SourceType = InTy;
16597     else if (InTy != SourceType) {
16598       // Multiple income types. Abort.
16599       SourceType = MVT::Other;
16600       break;
16601     }
16602 
16603     // Check if all of the extends are ANY_EXTENDs.
16604     AllAnyExt &= AnyExt;
16605   }
16606 
16607   // In order to have valid types, all of the inputs must be extended from the
16608   // same source type and all of the inputs must be any or zero extend.
16609   // Scalar sizes must be a power of two.
16610   EVT OutScalarTy = VT.getScalarType();
16611   bool ValidTypes = SourceType != MVT::Other &&
16612                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16613                  isPowerOf2_32(SourceType.getSizeInBits());
16614 
16615   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16616   // turn into a single shuffle instruction.
16617   if (!ValidTypes)
16618     return SDValue();
16619 
16620   bool isLE = DAG.getDataLayout().isLittleEndian();
16621   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16622   assert(ElemRatio > 1 && "Invalid element size ratio");
16623   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16624                                DAG.getConstant(0, DL, SourceType);
16625 
16626   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16627   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16628 
16629   // Populate the new build_vector
16630   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16631     SDValue Cast = N->getOperand(i);
16632     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16633             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16634             Cast.isUndef()) && "Invalid cast opcode");
16635     SDValue In;
16636     if (Cast.isUndef())
16637       In = DAG.getUNDEF(SourceType);
16638     else
16639       In = Cast->getOperand(0);
16640     unsigned Index = isLE ? (i * ElemRatio) :
16641                             (i * ElemRatio + (ElemRatio - 1));
16642 
16643     assert(Index < Ops.size() && "Invalid index");
16644     Ops[Index] = In;
16645   }
16646 
16647   // The type of the new BUILD_VECTOR node.
16648   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16649   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16650          "Invalid vector size");
16651   // Check if the new vector type is legal.
16652   if (!isTypeLegal(VecVT) ||
16653       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16654        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16655     return SDValue();
16656 
16657   // Make the new BUILD_VECTOR.
16658   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16659 
16660   // The new BUILD_VECTOR node has the potential to be further optimized.
16661   AddToWorklist(BV.getNode());
16662   // Bitcast to the desired type.
16663   return DAG.getBitcast(VT, BV);
16664 }
16665 
16666 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16667                                            ArrayRef<int> VectorMask,
16668                                            SDValue VecIn1, SDValue VecIn2,
16669                                            unsigned LeftIdx, bool DidSplitVec) {
16670   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16671   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16672 
16673   EVT VT = N->getValueType(0);
16674   EVT InVT1 = VecIn1.getValueType();
16675   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16676 
16677   unsigned NumElems = VT.getVectorNumElements();
16678   unsigned ShuffleNumElems = NumElems;
16679 
16680   // If we artificially split a vector in two already, then the offsets in the
16681   // operands will all be based off of VecIn1, even those in VecIn2.
16682   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
16683 
16684   // We can't generate a shuffle node with mismatched input and output types.
16685   // Try to make the types match the type of the output.
16686   if (InVT1 != VT || InVT2 != VT) {
16687     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16688       // If the output vector length is a multiple of both input lengths,
16689       // we can concatenate them and pad the rest with undefs.
16690       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16691       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16692       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16693       ConcatOps[0] = VecIn1;
16694       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16695       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16696       VecIn2 = SDValue();
16697     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16698       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16699         return SDValue();
16700 
16701       if (!VecIn2.getNode()) {
16702         // If we only have one input vector, and it's twice the size of the
16703         // output, split it in two.
16704         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16705                              DAG.getConstant(NumElems, DL, IdxTy));
16706         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16707         // Since we now have shorter input vectors, adjust the offset of the
16708         // second vector's start.
16709         Vec2Offset = NumElems;
16710       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16711         // VecIn1 is wider than the output, and we have another, possibly
16712         // smaller input. Pad the smaller input with undefs, shuffle at the
16713         // input vector width, and extract the output.
16714         // The shuffle type is different than VT, so check legality again.
16715         if (LegalOperations &&
16716             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16717           return SDValue();
16718 
16719         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16720         // lower it back into a BUILD_VECTOR. So if the inserted type is
16721         // illegal, don't even try.
16722         if (InVT1 != InVT2) {
16723           if (!TLI.isTypeLegal(InVT2))
16724             return SDValue();
16725           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16726                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16727         }
16728         ShuffleNumElems = NumElems * 2;
16729       } else {
16730         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16731         // than VecIn1. We can't handle this for now - this case will disappear
16732         // when we start sorting the vectors by type.
16733         return SDValue();
16734       }
16735     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16736                InVT1.getSizeInBits() == VT.getSizeInBits()) {
16737       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16738       ConcatOps[0] = VecIn2;
16739       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16740     } else {
16741       // TODO: Support cases where the length mismatch isn't exactly by a
16742       // factor of 2.
16743       // TODO: Move this check upwards, so that if we have bad type
16744       // mismatches, we don't create any DAG nodes.
16745       return SDValue();
16746     }
16747   }
16748 
16749   // Initialize mask to undef.
16750   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16751 
16752   // Only need to run up to the number of elements actually used, not the
16753   // total number of elements in the shuffle - if we are shuffling a wider
16754   // vector, the high lanes should be set to undef.
16755   for (unsigned i = 0; i != NumElems; ++i) {
16756     if (VectorMask[i] <= 0)
16757       continue;
16758 
16759     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16760     if (VectorMask[i] == (int)LeftIdx) {
16761       Mask[i] = ExtIndex;
16762     } else if (VectorMask[i] == (int)LeftIdx + 1) {
16763       Mask[i] = Vec2Offset + ExtIndex;
16764     }
16765   }
16766 
16767   // The type the input vectors may have changed above.
16768   InVT1 = VecIn1.getValueType();
16769 
16770   // If we already have a VecIn2, it should have the same type as VecIn1.
16771   // If we don't, get an undef/zero vector of the appropriate type.
16772   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16773   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16774 
16775   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16776   if (ShuffleNumElems > NumElems)
16777     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16778 
16779   return Shuffle;
16780 }
16781 
16782 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16783   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16784 
16785   // First, determine where the build vector is not undef.
16786   // TODO: We could extend this to handle zero elements as well as undefs.
16787   int NumBVOps = BV->getNumOperands();
16788   int ZextElt = -1;
16789   for (int i = 0; i != NumBVOps; ++i) {
16790     SDValue Op = BV->getOperand(i);
16791     if (Op.isUndef())
16792       continue;
16793     if (ZextElt == -1)
16794       ZextElt = i;
16795     else
16796       return SDValue();
16797   }
16798   // Bail out if there's no non-undef element.
16799   if (ZextElt == -1)
16800     return SDValue();
16801 
16802   // The build vector contains some number of undef elements and exactly
16803   // one other element. That other element must be a zero-extended scalar
16804   // extracted from a vector at a constant index to turn this into a shuffle.
16805   // Also, require that the build vector does not implicitly truncate/extend
16806   // its elements.
16807   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16808   EVT VT = BV->getValueType(0);
16809   SDValue Zext = BV->getOperand(ZextElt);
16810   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16811       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16812       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16813       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16814     return SDValue();
16815 
16816   // The zero-extend must be a multiple of the source size, and we must be
16817   // building a vector of the same size as the source of the extract element.
16818   SDValue Extract = Zext.getOperand(0);
16819   unsigned DestSize = Zext.getValueSizeInBits();
16820   unsigned SrcSize = Extract.getValueSizeInBits();
16821   if (DestSize % SrcSize != 0 ||
16822       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16823     return SDValue();
16824 
16825   // Create a shuffle mask that will combine the extracted element with zeros
16826   // and undefs.
16827   int ZextRatio = DestSize / SrcSize;
16828   int NumMaskElts = NumBVOps * ZextRatio;
16829   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16830   for (int i = 0; i != NumMaskElts; ++i) {
16831     if (i / ZextRatio == ZextElt) {
16832       // The low bits of the (potentially translated) extracted element map to
16833       // the source vector. The high bits map to zero. We will use a zero vector
16834       // as the 2nd source operand of the shuffle, so use the 1st element of
16835       // that vector (mask value is number-of-elements) for the high bits.
16836       if (i % ZextRatio == 0)
16837         ShufMask[i] = Extract.getConstantOperandVal(1);
16838       else
16839         ShufMask[i] = NumMaskElts;
16840     }
16841 
16842     // Undef elements of the build vector remain undef because we initialize
16843     // the shuffle mask with -1.
16844   }
16845 
16846   // Turn this into a shuffle with zero if that's legal.
16847   EVT VecVT = Extract.getOperand(0).getValueType();
16848   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16849     return SDValue();
16850 
16851   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16852   // bitcast (shuffle V, ZeroVec, VectorMask)
16853   SDLoc DL(BV);
16854   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16855   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16856                                       ShufMask);
16857   return DAG.getBitcast(VT, Shuf);
16858 }
16859 
16860 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16861 // operations. If the types of the vectors we're extracting from allow it,
16862 // turn this into a vector_shuffle node.
16863 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16864   SDLoc DL(N);
16865   EVT VT = N->getValueType(0);
16866 
16867   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16868   if (!isTypeLegal(VT))
16869     return SDValue();
16870 
16871   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16872     return V;
16873 
16874   // May only combine to shuffle after legalize if shuffle is legal.
16875   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16876     return SDValue();
16877 
16878   bool UsesZeroVector = false;
16879   unsigned NumElems = N->getNumOperands();
16880 
16881   // Record, for each element of the newly built vector, which input vector
16882   // that element comes from. -1 stands for undef, 0 for the zero vector,
16883   // and positive values for the input vectors.
16884   // VectorMask maps each element to its vector number, and VecIn maps vector
16885   // numbers to their initial SDValues.
16886 
16887   SmallVector<int, 8> VectorMask(NumElems, -1);
16888   SmallVector<SDValue, 8> VecIn;
16889   VecIn.push_back(SDValue());
16890 
16891   for (unsigned i = 0; i != NumElems; ++i) {
16892     SDValue Op = N->getOperand(i);
16893 
16894     if (Op.isUndef())
16895       continue;
16896 
16897     // See if we can use a blend with a zero vector.
16898     // TODO: Should we generalize this to a blend with an arbitrary constant
16899     // vector?
16900     if (isNullConstant(Op) || isNullFPConstant(Op)) {
16901       UsesZeroVector = true;
16902       VectorMask[i] = 0;
16903       continue;
16904     }
16905 
16906     // Not an undef or zero. If the input is something other than an
16907     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16908     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16909         !isa<ConstantSDNode>(Op.getOperand(1)))
16910       return SDValue();
16911     SDValue ExtractedFromVec = Op.getOperand(0);
16912 
16913     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
16914     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16915       return SDValue();
16916 
16917     // All inputs must have the same element type as the output.
16918     if (VT.getVectorElementType() !=
16919         ExtractedFromVec.getValueType().getVectorElementType())
16920       return SDValue();
16921 
16922     // Have we seen this input vector before?
16923     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16924     // a map back from SDValues to numbers isn't worth it.
16925     unsigned Idx = std::distance(
16926         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16927     if (Idx == VecIn.size())
16928       VecIn.push_back(ExtractedFromVec);
16929 
16930     VectorMask[i] = Idx;
16931   }
16932 
16933   // If we didn't find at least one input vector, bail out.
16934   if (VecIn.size() < 2)
16935     return SDValue();
16936 
16937   // If all the Operands of BUILD_VECTOR extract from same
16938   // vector, then split the vector efficiently based on the maximum
16939   // vector access index and adjust the VectorMask and
16940   // VecIn accordingly.
16941   bool DidSplitVec = false;
16942   if (VecIn.size() == 2) {
16943     unsigned MaxIndex = 0;
16944     unsigned NearestPow2 = 0;
16945     SDValue Vec = VecIn.back();
16946     EVT InVT = Vec.getValueType();
16947     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16948     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16949 
16950     for (unsigned i = 0; i < NumElems; i++) {
16951       if (VectorMask[i] <= 0)
16952         continue;
16953       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16954       IndexVec[i] = Index;
16955       MaxIndex = std::max(MaxIndex, Index);
16956     }
16957 
16958     NearestPow2 = PowerOf2Ceil(MaxIndex);
16959     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16960         NumElems * 2 < NearestPow2) {
16961       unsigned SplitSize = NearestPow2 / 2;
16962       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16963                                      InVT.getVectorElementType(), SplitSize);
16964       if (TLI.isTypeLegal(SplitVT)) {
16965         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16966                                      DAG.getConstant(SplitSize, DL, IdxTy));
16967         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16968                                      DAG.getConstant(0, DL, IdxTy));
16969         VecIn.pop_back();
16970         VecIn.push_back(VecIn1);
16971         VecIn.push_back(VecIn2);
16972         DidSplitVec = true;
16973 
16974         for (unsigned i = 0; i < NumElems; i++) {
16975           if (VectorMask[i] <= 0)
16976             continue;
16977           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16978         }
16979       }
16980     }
16981   }
16982 
16983   // TODO: We want to sort the vectors by descending length, so that adjacent
16984   // pairs have similar length, and the longer vector is always first in the
16985   // pair.
16986 
16987   // TODO: Should this fire if some of the input vectors has illegal type (like
16988   // it does now), or should we let legalization run its course first?
16989 
16990   // Shuffle phase:
16991   // Take pairs of vectors, and shuffle them so that the result has elements
16992   // from these vectors in the correct places.
16993   // For example, given:
16994   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16995   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16996   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16997   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16998   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16999   // We will generate:
17000   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17001   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17002   SmallVector<SDValue, 4> Shuffles;
17003   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17004     unsigned LeftIdx = 2 * In + 1;
17005     SDValue VecLeft = VecIn[LeftIdx];
17006     SDValue VecRight =
17007         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17008 
17009     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17010                                                 VecRight, LeftIdx, DidSplitVec))
17011       Shuffles.push_back(Shuffle);
17012     else
17013       return SDValue();
17014   }
17015 
17016   // If we need the zero vector as an "ingredient" in the blend tree, add it
17017   // to the list of shuffles.
17018   if (UsesZeroVector)
17019     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17020                                       : DAG.getConstantFP(0.0, DL, VT));
17021 
17022   // If we only have one shuffle, we're done.
17023   if (Shuffles.size() == 1)
17024     return Shuffles[0];
17025 
17026   // Update the vector mask to point to the post-shuffle vectors.
17027   for (int &Vec : VectorMask)
17028     if (Vec == 0)
17029       Vec = Shuffles.size() - 1;
17030     else
17031       Vec = (Vec - 1) / 2;
17032 
17033   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17034   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17035   // generate:
17036   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17037   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17038   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17039   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17040   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17041   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17042   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17043 
17044   // Make sure the initial size of the shuffle list is even.
17045   if (Shuffles.size() % 2)
17046     Shuffles.push_back(DAG.getUNDEF(VT));
17047 
17048   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17049     if (CurSize % 2) {
17050       Shuffles[CurSize] = DAG.getUNDEF(VT);
17051       CurSize++;
17052     }
17053     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17054       int Left = 2 * In;
17055       int Right = 2 * In + 1;
17056       SmallVector<int, 8> Mask(NumElems, -1);
17057       for (unsigned i = 0; i != NumElems; ++i) {
17058         if (VectorMask[i] == Left) {
17059           Mask[i] = i;
17060           VectorMask[i] = In;
17061         } else if (VectorMask[i] == Right) {
17062           Mask[i] = i + NumElems;
17063           VectorMask[i] = In;
17064         }
17065       }
17066 
17067       Shuffles[In] =
17068           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17069     }
17070   }
17071   return Shuffles[0];
17072 }
17073 
17074 // Try to turn a build vector of zero extends of extract vector elts into a
17075 // a vector zero extend and possibly an extract subvector.
17076 // TODO: Support sign extend?
17077 // TODO: Allow undef elements?
17078 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17079   if (LegalOperations)
17080     return SDValue();
17081 
17082   EVT VT = N->getValueType(0);
17083 
17084   bool FoundZeroExtend = false;
17085   SDValue Op0 = N->getOperand(0);
17086   auto checkElem = [&](SDValue Op) -> int64_t {
17087     unsigned Opc = Op.getOpcode();
17088     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17089     if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17090         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17091         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17092       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17093         return C->getZExtValue();
17094     return -1;
17095   };
17096 
17097   // Make sure the first element matches
17098   // (zext (extract_vector_elt X, C))
17099   int64_t Offset = checkElem(Op0);
17100   if (Offset < 0)
17101     return SDValue();
17102 
17103   unsigned NumElems = N->getNumOperands();
17104   SDValue In = Op0.getOperand(0).getOperand(0);
17105   EVT InSVT = In.getValueType().getScalarType();
17106   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17107 
17108   // Don't create an illegal input type after type legalization.
17109   if (LegalTypes && !TLI.isTypeLegal(InVT))
17110     return SDValue();
17111 
17112   // Ensure all the elements come from the same vector and are adjacent.
17113   for (unsigned i = 1; i != NumElems; ++i) {
17114     if ((Offset + i) != checkElem(N->getOperand(i)))
17115       return SDValue();
17116   }
17117 
17118   SDLoc DL(N);
17119   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17120                    Op0.getOperand(0).getOperand(1));
17121   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17122                      VT, In);
17123 }
17124 
17125 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17126   EVT VT = N->getValueType(0);
17127 
17128   // A vector built entirely of undefs is undef.
17129   if (ISD::allOperandsUndef(N))
17130     return DAG.getUNDEF(VT);
17131 
17132   // If this is a splat of a bitcast from another vector, change to a
17133   // concat_vector.
17134   // For example:
17135   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17136   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17137   //
17138   // If X is a build_vector itself, the concat can become a larger build_vector.
17139   // TODO: Maybe this is useful for non-splat too?
17140   if (!LegalOperations) {
17141     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17142       Splat = peekThroughBitcasts(Splat);
17143       EVT SrcVT = Splat.getValueType();
17144       if (SrcVT.isVector()) {
17145         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17146         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17147                                      SrcVT.getVectorElementType(), NumElts);
17148         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17149           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17150           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17151                                        NewVT, Ops);
17152           return DAG.getBitcast(VT, Concat);
17153         }
17154       }
17155     }
17156   }
17157 
17158   // Check if we can express BUILD VECTOR via subvector extract.
17159   if (!LegalTypes && (N->getNumOperands() > 1)) {
17160     SDValue Op0 = N->getOperand(0);
17161     auto checkElem = [&](SDValue Op) -> uint64_t {
17162       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17163           (Op0.getOperand(0) == Op.getOperand(0)))
17164         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17165           return CNode->getZExtValue();
17166       return -1;
17167     };
17168 
17169     int Offset = checkElem(Op0);
17170     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17171       if (Offset + i != checkElem(N->getOperand(i))) {
17172         Offset = -1;
17173         break;
17174       }
17175     }
17176 
17177     if ((Offset == 0) &&
17178         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17179       return Op0.getOperand(0);
17180     if ((Offset != -1) &&
17181         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17182          0)) // IDX must be multiple of output size.
17183       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17184                          Op0.getOperand(0), Op0.getOperand(1));
17185   }
17186 
17187   if (SDValue V = convertBuildVecZextToZext(N))
17188     return V;
17189 
17190   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17191     return V;
17192 
17193   if (SDValue V = reduceBuildVecToShuffle(N))
17194     return V;
17195 
17196   return SDValue();
17197 }
17198 
17199 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17200   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17201   EVT OpVT = N->getOperand(0).getValueType();
17202 
17203   // If the operands are legal vectors, leave them alone.
17204   if (TLI.isTypeLegal(OpVT))
17205     return SDValue();
17206 
17207   SDLoc DL(N);
17208   EVT VT = N->getValueType(0);
17209   SmallVector<SDValue, 8> Ops;
17210 
17211   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17212   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17213 
17214   // Keep track of what we encounter.
17215   bool AnyInteger = false;
17216   bool AnyFP = false;
17217   for (const SDValue &Op : N->ops()) {
17218     if (ISD::BITCAST == Op.getOpcode() &&
17219         !Op.getOperand(0).getValueType().isVector())
17220       Ops.push_back(Op.getOperand(0));
17221     else if (ISD::UNDEF == Op.getOpcode())
17222       Ops.push_back(ScalarUndef);
17223     else
17224       return SDValue();
17225 
17226     // Note whether we encounter an integer or floating point scalar.
17227     // If it's neither, bail out, it could be something weird like x86mmx.
17228     EVT LastOpVT = Ops.back().getValueType();
17229     if (LastOpVT.isFloatingPoint())
17230       AnyFP = true;
17231     else if (LastOpVT.isInteger())
17232       AnyInteger = true;
17233     else
17234       return SDValue();
17235   }
17236 
17237   // If any of the operands is a floating point scalar bitcast to a vector,
17238   // use floating point types throughout, and bitcast everything.
17239   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17240   if (AnyFP) {
17241     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17242     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17243     if (AnyInteger) {
17244       for (SDValue &Op : Ops) {
17245         if (Op.getValueType() == SVT)
17246           continue;
17247         if (Op.isUndef())
17248           Op = ScalarUndef;
17249         else
17250           Op = DAG.getBitcast(SVT, Op);
17251       }
17252     }
17253   }
17254 
17255   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17256                                VT.getSizeInBits() / SVT.getSizeInBits());
17257   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17258 }
17259 
17260 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17261 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17262 // most two distinct vectors the same size as the result, attempt to turn this
17263 // into a legal shuffle.
17264 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17265   EVT VT = N->getValueType(0);
17266   EVT OpVT = N->getOperand(0).getValueType();
17267   int NumElts = VT.getVectorNumElements();
17268   int NumOpElts = OpVT.getVectorNumElements();
17269 
17270   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17271   SmallVector<int, 8> Mask;
17272 
17273   for (SDValue Op : N->ops()) {
17274     Op = peekThroughBitcasts(Op);
17275 
17276     // UNDEF nodes convert to UNDEF shuffle mask values.
17277     if (Op.isUndef()) {
17278       Mask.append((unsigned)NumOpElts, -1);
17279       continue;
17280     }
17281 
17282     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17283       return SDValue();
17284 
17285     // What vector are we extracting the subvector from and at what index?
17286     SDValue ExtVec = Op.getOperand(0);
17287 
17288     // We want the EVT of the original extraction to correctly scale the
17289     // extraction index.
17290     EVT ExtVT = ExtVec.getValueType();
17291     ExtVec = peekThroughBitcasts(ExtVec);
17292 
17293     // UNDEF nodes convert to UNDEF shuffle mask values.
17294     if (ExtVec.isUndef()) {
17295       Mask.append((unsigned)NumOpElts, -1);
17296       continue;
17297     }
17298 
17299     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17300       return SDValue();
17301     int ExtIdx = Op.getConstantOperandVal(1);
17302 
17303     // Ensure that we are extracting a subvector from a vector the same
17304     // size as the result.
17305     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17306       return SDValue();
17307 
17308     // Scale the subvector index to account for any bitcast.
17309     int NumExtElts = ExtVT.getVectorNumElements();
17310     if (0 == (NumExtElts % NumElts))
17311       ExtIdx /= (NumExtElts / NumElts);
17312     else if (0 == (NumElts % NumExtElts))
17313       ExtIdx *= (NumElts / NumExtElts);
17314     else
17315       return SDValue();
17316 
17317     // At most we can reference 2 inputs in the final shuffle.
17318     if (SV0.isUndef() || SV0 == ExtVec) {
17319       SV0 = ExtVec;
17320       for (int i = 0; i != NumOpElts; ++i)
17321         Mask.push_back(i + ExtIdx);
17322     } else if (SV1.isUndef() || SV1 == ExtVec) {
17323       SV1 = ExtVec;
17324       for (int i = 0; i != NumOpElts; ++i)
17325         Mask.push_back(i + ExtIdx + NumElts);
17326     } else {
17327       return SDValue();
17328     }
17329   }
17330 
17331   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
17332     return SDValue();
17333 
17334   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17335                               DAG.getBitcast(VT, SV1), Mask);
17336 }
17337 
17338 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17339   // If we only have one input vector, we don't need to do any concatenation.
17340   if (N->getNumOperands() == 1)
17341     return N->getOperand(0);
17342 
17343   // Check if all of the operands are undefs.
17344   EVT VT = N->getValueType(0);
17345   if (ISD::allOperandsUndef(N))
17346     return DAG.getUNDEF(VT);
17347 
17348   // Optimize concat_vectors where all but the first of the vectors are undef.
17349   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17350         return Op.isUndef();
17351       })) {
17352     SDValue In = N->getOperand(0);
17353     assert(In.getValueType().isVector() && "Must concat vectors");
17354 
17355     SDValue Scalar = peekThroughOneUseBitcasts(In);
17356 
17357     // concat_vectors(scalar_to_vector(scalar), undef) ->
17358     //     scalar_to_vector(scalar)
17359     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17360          Scalar.hasOneUse()) {
17361       EVT SVT = Scalar.getValueType().getVectorElementType();
17362       if (SVT == Scalar.getOperand(0).getValueType())
17363         Scalar = Scalar.getOperand(0);
17364     }
17365 
17366     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17367     if (!Scalar.getValueType().isVector()) {
17368       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17369       // look through the trunc so we can still do the transform:
17370       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17371       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17372           !TLI.isTypeLegal(Scalar.getValueType()) &&
17373           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17374         Scalar = Scalar->getOperand(0);
17375 
17376       EVT SclTy = Scalar.getValueType();
17377 
17378       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17379         return SDValue();
17380 
17381       // Bail out if the vector size is not a multiple of the scalar size.
17382       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17383         return SDValue();
17384 
17385       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17386       if (VNTNumElms < 2)
17387         return SDValue();
17388 
17389       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17390       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17391         return SDValue();
17392 
17393       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17394       return DAG.getBitcast(VT, Res);
17395     }
17396   }
17397 
17398   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17399   // We have already tested above for an UNDEF only concatenation.
17400   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17401   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17402   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17403     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17404   };
17405   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17406     SmallVector<SDValue, 8> Opnds;
17407     EVT SVT = VT.getScalarType();
17408 
17409     EVT MinVT = SVT;
17410     if (!SVT.isFloatingPoint()) {
17411       // If BUILD_VECTOR are from built from integer, they may have different
17412       // operand types. Get the smallest type and truncate all operands to it.
17413       bool FoundMinVT = false;
17414       for (const SDValue &Op : N->ops())
17415         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17416           EVT OpSVT = Op.getOperand(0).getValueType();
17417           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17418           FoundMinVT = true;
17419         }
17420       assert(FoundMinVT && "Concat vector type mismatch");
17421     }
17422 
17423     for (const SDValue &Op : N->ops()) {
17424       EVT OpVT = Op.getValueType();
17425       unsigned NumElts = OpVT.getVectorNumElements();
17426 
17427       if (ISD::UNDEF == Op.getOpcode())
17428         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17429 
17430       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17431         if (SVT.isFloatingPoint()) {
17432           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17433           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17434         } else {
17435           for (unsigned i = 0; i != NumElts; ++i)
17436             Opnds.push_back(
17437                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17438         }
17439       }
17440     }
17441 
17442     assert(VT.getVectorNumElements() == Opnds.size() &&
17443            "Concat vector type mismatch");
17444     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17445   }
17446 
17447   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17448   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17449     return V;
17450 
17451   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17452   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17453     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17454       return V;
17455 
17456   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17457   // nodes often generate nop CONCAT_VECTOR nodes.
17458   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17459   // place the incoming vectors at the exact same location.
17460   SDValue SingleSource = SDValue();
17461   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17462 
17463   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17464     SDValue Op = N->getOperand(i);
17465 
17466     if (Op.isUndef())
17467       continue;
17468 
17469     // Check if this is the identity extract:
17470     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17471       return SDValue();
17472 
17473     // Find the single incoming vector for the extract_subvector.
17474     if (SingleSource.getNode()) {
17475       if (Op.getOperand(0) != SingleSource)
17476         return SDValue();
17477     } else {
17478       SingleSource = Op.getOperand(0);
17479 
17480       // Check the source type is the same as the type of the result.
17481       // If not, this concat may extend the vector, so we can not
17482       // optimize it away.
17483       if (SingleSource.getValueType() != N->getValueType(0))
17484         return SDValue();
17485     }
17486 
17487     unsigned IdentityIndex = i * PartNumElem;
17488     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17489     // The extract index must be constant.
17490     if (!CS)
17491       return SDValue();
17492 
17493     // Check that we are reading from the identity index.
17494     if (CS->getZExtValue() != IdentityIndex)
17495       return SDValue();
17496   }
17497 
17498   if (SingleSource.getNode())
17499     return SingleSource;
17500 
17501   return SDValue();
17502 }
17503 
17504 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
17505                                               SelectionDAG &DAG) {
17506   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17507   SDValue BinOp = Extract->getOperand(0);
17508   if (!TLI.isBinOp(BinOp.getOpcode()) || BinOp.getNode()->getNumValues() != 1)
17509     return SDValue();
17510 
17511   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
17512   SDValue Index = Extract->getOperand(1);
17513   EVT VT = Extract->getValueType(0);
17514   bool IsInsert0 = Bop0.getOpcode() == ISD::INSERT_SUBVECTOR &&
17515                    Bop0.getOperand(1).getValueType() == VT &&
17516                    Bop0.getOperand(2) == Index;
17517   bool IsInsert1 = Bop1.getOpcode() == ISD::INSERT_SUBVECTOR &&
17518                    Bop1.getOperand(1).getValueType() == VT &&
17519                    Bop1.getOperand(2) == Index;
17520   // TODO: We could handle the case where only 1 operand is being inserted by
17521   //       creating an extract of the other operand, but that requires checking
17522   //       number of uses and/or costs.
17523   if (!IsInsert0 || !IsInsert1 ||
17524       !TLI.isOperationLegalOrCustom(BinOp.getOpcode(), VT))
17525     return SDValue();
17526 
17527   // We are inserting both operands of the wide binop only to extract back
17528   // to the narrow vector size. Eliminate all of the insert/extract:
17529   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
17530   return DAG.getNode(BinOp.getOpcode(), SDLoc(Extract), VT, Bop0.getOperand(1),
17531                      Bop1.getOperand(1), BinOp->getFlags());
17532 }
17533 
17534 /// If we are extracting a subvector produced by a wide binary operator try
17535 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17536 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17537   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17538   // some of these bailouts with other transforms.
17539 
17540   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
17541     return V;
17542 
17543   // The extract index must be a constant, so we can map it to a concat operand.
17544   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17545   if (!ExtractIndexC)
17546     return SDValue();
17547 
17548   // We are looking for an optionally bitcasted wide vector binary operator
17549   // feeding an extract subvector.
17550   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17551   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17552   if (!TLI.isBinOp(BinOp.getOpcode()) || BinOp.getNode()->getNumValues() != 1)
17553     return SDValue();
17554 
17555   // The binop must be a vector type, so we can extract some fraction of it.
17556   EVT WideBVT = BinOp.getValueType();
17557   if (!WideBVT.isVector())
17558     return SDValue();
17559 
17560   EVT VT = Extract->getValueType(0);
17561   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17562   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17563          "Extract index is not a multiple of the vector length.");
17564 
17565   // Bail out if this is not a proper multiple width extraction.
17566   unsigned WideWidth = WideBVT.getSizeInBits();
17567   unsigned NarrowWidth = VT.getSizeInBits();
17568   if (WideWidth % NarrowWidth != 0)
17569     return SDValue();
17570 
17571   // Bail out if we are extracting a fraction of a single operation. This can
17572   // occur because we potentially looked through a bitcast of the binop.
17573   unsigned NarrowingRatio = WideWidth / NarrowWidth;
17574   unsigned WideNumElts = WideBVT.getVectorNumElements();
17575   if (WideNumElts % NarrowingRatio != 0)
17576     return SDValue();
17577 
17578   // Bail out if the target does not support a narrower version of the binop.
17579   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17580                                    WideNumElts / NarrowingRatio);
17581   unsigned BOpcode = BinOp.getOpcode();
17582   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17583     return SDValue();
17584 
17585   // If extraction is cheap, we don't need to look at the binop operands
17586   // for concat ops. The narrow binop alone makes this transform profitable.
17587   // We can't just reuse the original extract index operand because we may have
17588   // bitcasted.
17589   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17590   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17591   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17592   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17593       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17594     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17595     SDLoc DL(Extract);
17596     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17597     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17598                             BinOp.getOperand(0), NewExtIndex);
17599     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17600                             BinOp.getOperand(1), NewExtIndex);
17601     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17602                                       BinOp.getNode()->getFlags());
17603     return DAG.getBitcast(VT, NarrowBinOp);
17604   }
17605 
17606   // Only handle the case where we are doubling and then halving. A larger ratio
17607   // may require more than two narrow binops to replace the wide binop.
17608   if (NarrowingRatio != 2)
17609     return SDValue();
17610 
17611   // TODO: The motivating case for this transform is an x86 AVX1 target. That
17612   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17613   // flavors, but no other 256-bit integer support. This could be extended to
17614   // handle any binop, but that may require fixing/adding other folds to avoid
17615   // codegen regressions.
17616   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17617     return SDValue();
17618 
17619   // We need at least one concatenation operation of a binop operand to make
17620   // this transform worthwhile. The concat must double the input vector sizes.
17621   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
17622   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
17623   bool ConcatL =
17624       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17625   bool ConcatR =
17626       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17627   if (ConcatL || ConcatR) {
17628     // If a binop operand was not the result of a concat, we must extract a
17629     // half-sized operand for our new narrow binop:
17630     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17631     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
17632     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
17633     SDLoc DL(Extract);
17634     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17635     SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17636                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17637                                       BinOp.getOperand(0), IndexC);
17638 
17639     SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17640                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17641                                       BinOp.getOperand(1), IndexC);
17642 
17643     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17644     return DAG.getBitcast(VT, NarrowBinOp);
17645   }
17646 
17647   return SDValue();
17648 }
17649 
17650 /// If we are extracting a subvector from a wide vector load, convert to a
17651 /// narrow load to eliminate the extraction:
17652 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17653 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17654   // TODO: Add support for big-endian. The offset calculation must be adjusted.
17655   if (DAG.getDataLayout().isBigEndian())
17656     return SDValue();
17657 
17658   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17659   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17660   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17661     return SDValue();
17662 
17663   // Allow targets to opt-out.
17664   EVT VT = Extract->getValueType(0);
17665   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17666   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17667     return SDValue();
17668 
17669   // The narrow load will be offset from the base address of the old load if
17670   // we are extracting from something besides index 0 (little-endian).
17671   SDLoc DL(Extract);
17672   SDValue BaseAddr = Ld->getOperand(1);
17673   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17674 
17675   // TODO: Use "BaseIndexOffset" to make this more effective.
17676   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17677   MachineFunction &MF = DAG.getMachineFunction();
17678   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17679                                                    VT.getStoreSize());
17680   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17681   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17682   return NewLd;
17683 }
17684 
17685 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
17686   EVT NVT = N->getValueType(0);
17687   SDValue V = N->getOperand(0);
17688 
17689   // Extract from UNDEF is UNDEF.
17690   if (V.isUndef())
17691     return DAG.getUNDEF(NVT);
17692 
17693   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17694     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17695       return NarrowLoad;
17696 
17697   // Combine an extract of an extract into a single extract_subvector.
17698   // ext (ext X, C), 0 --> ext X, C
17699   SDValue Index = N->getOperand(1);
17700   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
17701       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
17702     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
17703                                     V.getConstantOperandVal(1)) &&
17704         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
17705       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
17706                          V.getOperand(1));
17707     }
17708   }
17709 
17710   // Try to move vector bitcast after extract_subv by scaling extraction index:
17711   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
17712   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
17713       V.getOperand(0).getValueType().isVector()) {
17714     SDValue SrcOp = V.getOperand(0);
17715     EVT SrcVT = SrcOp.getValueType();
17716     unsigned SrcNumElts = SrcVT.getVectorNumElements();
17717     unsigned DestNumElts = V.getValueType().getVectorNumElements();
17718     if ((SrcNumElts % DestNumElts) == 0) {
17719       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
17720       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
17721       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
17722                                       NewExtNumElts);
17723       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
17724         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
17725         SDLoc DL(N);
17726         SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
17727         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
17728                                          V.getOperand(0), NewIndex);
17729         return DAG.getBitcast(NVT, NewExtract);
17730       }
17731     }
17732   }
17733 
17734   // Combine:
17735   //    (extract_subvec (concat V1, V2, ...), i)
17736   // Into:
17737   //    Vi if possible
17738   // Only operand 0 is checked as 'concat' assumes all inputs of the same
17739   // type.
17740   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
17741       V.getOperand(0).getValueType() == NVT) {
17742     unsigned Idx = N->getConstantOperandVal(1);
17743     unsigned NumElems = NVT.getVectorNumElements();
17744     assert((Idx % NumElems) == 0 &&
17745            "IDX in concat is not a multiple of the result vector length.");
17746     return V->getOperand(Idx / NumElems);
17747   }
17748 
17749   V = peekThroughBitcasts(V);
17750 
17751   // If the input is a build vector. Try to make a smaller build vector.
17752   if (V.getOpcode() == ISD::BUILD_VECTOR) {
17753     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
17754       EVT InVT = V.getValueType();
17755       unsigned ExtractSize = NVT.getSizeInBits();
17756       unsigned EltSize = InVT.getScalarSizeInBits();
17757       // Only do this if we won't split any elements.
17758       if (ExtractSize % EltSize == 0) {
17759         unsigned NumElems = ExtractSize / EltSize;
17760         EVT EltVT = InVT.getVectorElementType();
17761         EVT ExtractVT = NumElems == 1 ? EltVT
17762                                       : EVT::getVectorVT(*DAG.getContext(),
17763                                                          EltVT, NumElems);
17764         if ((Level < AfterLegalizeDAG ||
17765              (NumElems == 1 ||
17766               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17767             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17768           unsigned IdxVal = IdxC->getZExtValue();
17769           IdxVal *= NVT.getScalarSizeInBits();
17770           IdxVal /= EltSize;
17771 
17772           if (NumElems == 1) {
17773             SDValue Src = V->getOperand(IdxVal);
17774             if (EltVT != Src.getValueType())
17775               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17776             return DAG.getBitcast(NVT, Src);
17777           }
17778 
17779           // Extract the pieces from the original build_vector.
17780           SDValue BuildVec = DAG.getBuildVector(
17781               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
17782           return DAG.getBitcast(NVT, BuildVec);
17783         }
17784       }
17785     }
17786   }
17787 
17788   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17789     // Handle only simple case where vector being inserted and vector
17790     // being extracted are of same size.
17791     EVT SmallVT = V.getOperand(1).getValueType();
17792     if (!NVT.bitsEq(SmallVT))
17793       return SDValue();
17794 
17795     // Only handle cases where both indexes are constants.
17796     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
17797     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17798     if (InsIdx && ExtIdx) {
17799       // Combine:
17800       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17801       // Into:
17802       //    indices are equal or bit offsets are equal => V1
17803       //    otherwise => (extract_subvec V1, ExtIdx)
17804       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17805           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17806         return DAG.getBitcast(NVT, V.getOperand(1));
17807       return DAG.getNode(
17808           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17809           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17810           Index);
17811     }
17812   }
17813 
17814   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17815     return NarrowBOp;
17816 
17817   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17818     return SDValue(N, 0);
17819 
17820   return SDValue();
17821 }
17822 
17823 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
17824 /// followed by concatenation. Narrow vector ops may have better performance
17825 /// than wide ops, and this can unlock further narrowing of other vector ops.
17826 /// Targets can invert this transform later if it is not profitable.
17827 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
17828                                          SelectionDAG &DAG) {
17829   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
17830   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
17831       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
17832       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
17833     return SDValue();
17834 
17835   // Split the wide shuffle mask into halves. Any mask element that is accessing
17836   // operand 1 is offset down to account for narrowing of the vectors.
17837   ArrayRef<int> Mask = Shuf->getMask();
17838   EVT VT = Shuf->getValueType(0);
17839   unsigned NumElts = VT.getVectorNumElements();
17840   unsigned HalfNumElts = NumElts / 2;
17841   SmallVector<int, 16> Mask0(HalfNumElts, -1);
17842   SmallVector<int, 16> Mask1(HalfNumElts, -1);
17843   for (unsigned i = 0; i != NumElts; ++i) {
17844     if (Mask[i] == -1)
17845       continue;
17846     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
17847     if (i < HalfNumElts)
17848       Mask0[i] = M;
17849     else
17850       Mask1[i - HalfNumElts] = M;
17851   }
17852 
17853   // Ask the target if this is a valid transform.
17854   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17855   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
17856                                 HalfNumElts);
17857   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
17858       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
17859     return SDValue();
17860 
17861   // shuffle (concat X, undef), (concat Y, undef), Mask -->
17862   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
17863   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
17864   SDLoc DL(Shuf);
17865   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
17866   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
17867   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
17868 }
17869 
17870 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17871 // or turn a shuffle of a single concat into simpler shuffle then concat.
17872 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17873   EVT VT = N->getValueType(0);
17874   unsigned NumElts = VT.getVectorNumElements();
17875 
17876   SDValue N0 = N->getOperand(0);
17877   SDValue N1 = N->getOperand(1);
17878   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17879   ArrayRef<int> Mask = SVN->getMask();
17880 
17881   SmallVector<SDValue, 4> Ops;
17882   EVT ConcatVT = N0.getOperand(0).getValueType();
17883   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17884   unsigned NumConcats = NumElts / NumElemsPerConcat;
17885 
17886   auto IsUndefMaskElt = [](int i) { return i == -1; };
17887 
17888   // Special case: shuffle(concat(A,B)) can be more efficiently represented
17889   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17890   // half vector elements.
17891   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17892       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
17893                    IsUndefMaskElt)) {
17894     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
17895                               N0.getOperand(1),
17896                               Mask.slice(0, NumElemsPerConcat));
17897     N1 = DAG.getUNDEF(ConcatVT);
17898     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17899   }
17900 
17901   // Look at every vector that's inserted. We're looking for exact
17902   // subvector-sized copies from a concatenated vector
17903   for (unsigned I = 0; I != NumConcats; ++I) {
17904     unsigned Begin = I * NumElemsPerConcat;
17905     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
17906 
17907     // Make sure we're dealing with a copy.
17908     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
17909       Ops.push_back(DAG.getUNDEF(ConcatVT));
17910       continue;
17911     }
17912 
17913     int OpIdx = -1;
17914     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
17915       if (IsUndefMaskElt(SubMask[i]))
17916         continue;
17917       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
17918         return SDValue();
17919       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
17920       if (0 <= OpIdx && EltOpIdx != OpIdx)
17921         return SDValue();
17922       OpIdx = EltOpIdx;
17923     }
17924     assert(0 <= OpIdx && "Unknown concat_vectors op");
17925 
17926     if (OpIdx < (int)N0.getNumOperands())
17927       Ops.push_back(N0.getOperand(OpIdx));
17928     else
17929       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
17930   }
17931 
17932   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17933 }
17934 
17935 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17936 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17937 //
17938 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17939 // a simplification in some sense, but it isn't appropriate in general: some
17940 // BUILD_VECTORs are substantially cheaper than others. The general case
17941 // of a BUILD_VECTOR requires inserting each element individually (or
17942 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17943 // all constants is a single constant pool load.  A BUILD_VECTOR where each
17944 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
17945 // are undef lowers to a small number of element insertions.
17946 //
17947 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17948 // We don't fold shuffles where one side is a non-zero constant, and we don't
17949 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17950 // non-constant operands. This seems to work out reasonably well in practice.
17951 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17952                                        SelectionDAG &DAG,
17953                                        const TargetLowering &TLI) {
17954   EVT VT = SVN->getValueType(0);
17955   unsigned NumElts = VT.getVectorNumElements();
17956   SDValue N0 = SVN->getOperand(0);
17957   SDValue N1 = SVN->getOperand(1);
17958 
17959   if (!N0->hasOneUse())
17960     return SDValue();
17961 
17962   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17963   // discussed above.
17964   if (!N1.isUndef()) {
17965     if (!N1->hasOneUse())
17966       return SDValue();
17967 
17968     bool N0AnyConst = isAnyConstantBuildVector(N0);
17969     bool N1AnyConst = isAnyConstantBuildVector(N1);
17970     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17971       return SDValue();
17972     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17973       return SDValue();
17974   }
17975 
17976   // If both inputs are splats of the same value then we can safely merge this
17977   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17978   bool IsSplat = false;
17979   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17980   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17981   if (BV0 && BV1)
17982     if (SDValue Splat0 = BV0->getSplatValue())
17983       IsSplat = (Splat0 == BV1->getSplatValue());
17984 
17985   SmallVector<SDValue, 8> Ops;
17986   SmallSet<SDValue, 16> DuplicateOps;
17987   for (int M : SVN->getMask()) {
17988     SDValue Op = DAG.getUNDEF(VT.getScalarType());
17989     if (M >= 0) {
17990       int Idx = M < (int)NumElts ? M : M - NumElts;
17991       SDValue &S = (M < (int)NumElts ? N0 : N1);
17992       if (S.getOpcode() == ISD::BUILD_VECTOR) {
17993         Op = S.getOperand(Idx);
17994       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17995         SDValue Op0 = S.getOperand(0);
17996         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
17997       } else {
17998         // Operand can't be combined - bail out.
17999         return SDValue();
18000       }
18001     }
18002 
18003     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18004     // generating a splat; semantically, this is fine, but it's likely to
18005     // generate low-quality code if the target can't reconstruct an appropriate
18006     // shuffle.
18007     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18008       if (!IsSplat && !DuplicateOps.insert(Op).second)
18009         return SDValue();
18010 
18011     Ops.push_back(Op);
18012   }
18013 
18014   // BUILD_VECTOR requires all inputs to be of the same type, find the
18015   // maximum type and extend them all.
18016   EVT SVT = VT.getScalarType();
18017   if (SVT.isInteger())
18018     for (SDValue &Op : Ops)
18019       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18020   if (SVT != VT.getScalarType())
18021     for (SDValue &Op : Ops)
18022       Op = TLI.isZExtFree(Op.getValueType(), SVT)
18023                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18024                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18025   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18026 }
18027 
18028 // Match shuffles that can be converted to any_vector_extend_in_reg.
18029 // This is often generated during legalization.
18030 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18031 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18032 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18033                                             SelectionDAG &DAG,
18034                                             const TargetLowering &TLI,
18035                                             bool LegalOperations) {
18036   EVT VT = SVN->getValueType(0);
18037   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18038 
18039   // TODO Add support for big-endian when we have a test case.
18040   if (!VT.isInteger() || IsBigEndian)
18041     return SDValue();
18042 
18043   unsigned NumElts = VT.getVectorNumElements();
18044   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18045   ArrayRef<int> Mask = SVN->getMask();
18046   SDValue N0 = SVN->getOperand(0);
18047 
18048   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18049   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18050     for (unsigned i = 0; i != NumElts; ++i) {
18051       if (Mask[i] < 0)
18052         continue;
18053       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18054         continue;
18055       return false;
18056     }
18057     return true;
18058   };
18059 
18060   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18061   // power-of-2 extensions as they are the most likely.
18062   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18063     // Check for non power of 2 vector sizes
18064     if (NumElts % Scale != 0)
18065       continue;
18066     if (!isAnyExtend(Scale))
18067       continue;
18068 
18069     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18070     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18071     // Never create an illegal type. Only create unsupported operations if we
18072     // are pre-legalization.
18073     if (TLI.isTypeLegal(OutVT))
18074       if (!LegalOperations ||
18075           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
18076         return DAG.getBitcast(VT,
18077                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18078                                           SDLoc(SVN), OutVT, N0));
18079   }
18080 
18081   return SDValue();
18082 }
18083 
18084 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18085 // each source element of a large type into the lowest elements of a smaller
18086 // destination type. This is often generated during legalization.
18087 // If the source node itself was a '*_extend_vector_inreg' node then we should
18088 // then be able to remove it.
18089 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18090                                         SelectionDAG &DAG) {
18091   EVT VT = SVN->getValueType(0);
18092   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18093 
18094   // TODO Add support for big-endian when we have a test case.
18095   if (!VT.isInteger() || IsBigEndian)
18096     return SDValue();
18097 
18098   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18099 
18100   unsigned Opcode = N0.getOpcode();
18101   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18102       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
18103       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
18104     return SDValue();
18105 
18106   SDValue N00 = N0.getOperand(0);
18107   ArrayRef<int> Mask = SVN->getMask();
18108   unsigned NumElts = VT.getVectorNumElements();
18109   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18110   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18111   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18112 
18113   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18114     return SDValue();
18115   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18116 
18117   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18118   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18119   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18120   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18121     for (unsigned i = 0; i != NumElts; ++i) {
18122       if (Mask[i] < 0)
18123         continue;
18124       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18125         continue;
18126       return false;
18127     }
18128     return true;
18129   };
18130 
18131   // At the moment we just handle the case where we've truncated back to the
18132   // same size as before the extension.
18133   // TODO: handle more extension/truncation cases as cases arise.
18134   if (EltSizeInBits != ExtSrcSizeInBits)
18135     return SDValue();
18136 
18137   // We can remove *extend_vector_inreg only if the truncation happens at
18138   // the same scale as the extension.
18139   if (isTruncate(ExtScale))
18140     return DAG.getBitcast(VT, N00);
18141 
18142   return SDValue();
18143 }
18144 
18145 // Combine shuffles of splat-shuffles of the form:
18146 // shuffle (shuffle V, undef, splat-mask), undef, M
18147 // If splat-mask contains undef elements, we need to be careful about
18148 // introducing undef's in the folded mask which are not the result of composing
18149 // the masks of the shuffles.
18150 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18151                                         SelectionDAG &DAG) {
18152   if (!Shuf->getOperand(1).isUndef())
18153     return SDValue();
18154   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18155   if (!Splat || !Splat->isSplat())
18156     return SDValue();
18157 
18158   ArrayRef<int> ShufMask = Shuf->getMask();
18159   ArrayRef<int> SplatMask = Splat->getMask();
18160   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18161 
18162   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18163   // every undef mask element in the splat-shuffle has a corresponding undef
18164   // element in the user-shuffle's mask or if the composition of mask elements
18165   // would result in undef.
18166   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18167   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18168   //   In this case it is not legal to simplify to the splat-shuffle because we
18169   //   may be exposing the users of the shuffle an undef element at index 1
18170   //   which was not there before the combine.
18171   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18172   //   In this case the composition of masks yields SplatMask, so it's ok to
18173   //   simplify to the splat-shuffle.
18174   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18175   //   In this case the composed mask includes all undef elements of SplatMask
18176   //   and in addition sets element zero to undef. It is safe to simplify to
18177   //   the splat-shuffle.
18178   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18179                                        ArrayRef<int> SplatMask) {
18180     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18181       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18182           SplatMask[UserMask[i]] != -1)
18183         return false;
18184     return true;
18185   };
18186   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18187     return Shuf->getOperand(0);
18188 
18189   // Create a new shuffle with a mask that is composed of the two shuffles'
18190   // masks.
18191   SmallVector<int, 32> NewMask;
18192   for (int Idx : ShufMask)
18193     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18194 
18195   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18196                               Splat->getOperand(0), Splat->getOperand(1),
18197                               NewMask);
18198 }
18199 
18200 /// If the shuffle mask is taking exactly one element from the first vector
18201 /// operand and passing through all other elements from the second vector
18202 /// operand, return the index of the mask element that is choosing an element
18203 /// from the first operand. Otherwise, return -1.
18204 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18205   int MaskSize = Mask.size();
18206   int EltFromOp0 = -1;
18207   // TODO: This does not match if there are undef elements in the shuffle mask.
18208   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18209   // removing an instruction (a shuffle), but losing the knowledge that some
18210   // vector lanes are not needed.
18211   for (int i = 0; i != MaskSize; ++i) {
18212     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18213       // We're looking for a shuffle of exactly one element from operand 0.
18214       if (EltFromOp0 != -1)
18215         return -1;
18216       EltFromOp0 = i;
18217     } else if (Mask[i] != i + MaskSize) {
18218       // Nothing from operand 1 can change lanes.
18219       return -1;
18220     }
18221   }
18222   return EltFromOp0;
18223 }
18224 
18225 /// If a shuffle inserts exactly one element from a source vector operand into
18226 /// another vector operand and we can access the specified element as a scalar,
18227 /// then we can eliminate the shuffle.
18228 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18229                                       SelectionDAG &DAG) {
18230   // First, check if we are taking one element of a vector and shuffling that
18231   // element into another vector.
18232   ArrayRef<int> Mask = Shuf->getMask();
18233   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18234   SDValue Op0 = Shuf->getOperand(0);
18235   SDValue Op1 = Shuf->getOperand(1);
18236   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18237   if (ShufOp0Index == -1) {
18238     // Commute mask and check again.
18239     ShuffleVectorSDNode::commuteMask(CommutedMask);
18240     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18241     if (ShufOp0Index == -1)
18242       return SDValue();
18243     // Commute operands to match the commuted shuffle mask.
18244     std::swap(Op0, Op1);
18245     Mask = CommutedMask;
18246   }
18247 
18248   // The shuffle inserts exactly one element from operand 0 into operand 1.
18249   // Now see if we can access that element as a scalar via a real insert element
18250   // instruction.
18251   // TODO: We can try harder to locate the element as a scalar. Examples: it
18252   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18253   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18254          "Shuffle mask value must be from operand 0");
18255   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18256     return SDValue();
18257 
18258   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18259   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18260     return SDValue();
18261 
18262   // There's an existing insertelement with constant insertion index, so we
18263   // don't need to check the legality/profitability of a replacement operation
18264   // that differs at most in the constant value. The target should be able to
18265   // lower any of those in a similar way. If not, legalization will expand this
18266   // to a scalar-to-vector plus shuffle.
18267   //
18268   // Note that the shuffle may move the scalar from the position that the insert
18269   // element used. Therefore, our new insert element occurs at the shuffle's
18270   // mask index value, not the insert's index value.
18271   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18272   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18273                                         Op0.getOperand(2).getValueType());
18274   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18275                      Op1, Op0.getOperand(1), NewInsIndex);
18276 }
18277 
18278 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18279 /// completely. This has the potential to lose undef knowledge because the first
18280 /// shuffle may not have an undef mask element where the second one does. So
18281 /// only call this after doing simplifications based on demanded elements.
18282 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18283   // shuf (shuf0 X, Y, Mask0), undef, Mask
18284   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18285   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18286     return SDValue();
18287 
18288   ArrayRef<int> Mask = Shuf->getMask();
18289   ArrayRef<int> Mask0 = Shuf0->getMask();
18290   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18291     // Ignore undef elements.
18292     if (Mask[i] == -1)
18293       continue;
18294     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18295 
18296     // Is the element of the shuffle operand chosen by this shuffle the same as
18297     // the element chosen by the shuffle operand itself?
18298     if (Mask0[Mask[i]] != Mask0[i])
18299       return SDValue();
18300   }
18301   // Every element of this shuffle is identical to the result of the previous
18302   // shuffle, so we can replace this value.
18303   return Shuf->getOperand(0);
18304 }
18305 
18306 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18307   EVT VT = N->getValueType(0);
18308   unsigned NumElts = VT.getVectorNumElements();
18309 
18310   SDValue N0 = N->getOperand(0);
18311   SDValue N1 = N->getOperand(1);
18312 
18313   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18314 
18315   // Canonicalize shuffle undef, undef -> undef
18316   if (N0.isUndef() && N1.isUndef())
18317     return DAG.getUNDEF(VT);
18318 
18319   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18320 
18321   // Canonicalize shuffle v, v -> v, undef
18322   if (N0 == N1) {
18323     SmallVector<int, 8> NewMask;
18324     for (unsigned i = 0; i != NumElts; ++i) {
18325       int Idx = SVN->getMaskElt(i);
18326       if (Idx >= (int)NumElts) Idx -= NumElts;
18327       NewMask.push_back(Idx);
18328     }
18329     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18330   }
18331 
18332   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18333   if (N0.isUndef())
18334     return DAG.getCommutedVectorShuffle(*SVN);
18335 
18336   // Remove references to rhs if it is undef
18337   if (N1.isUndef()) {
18338     bool Changed = false;
18339     SmallVector<int, 8> NewMask;
18340     for (unsigned i = 0; i != NumElts; ++i) {
18341       int Idx = SVN->getMaskElt(i);
18342       if (Idx >= (int)NumElts) {
18343         Idx = -1;
18344         Changed = true;
18345       }
18346       NewMask.push_back(Idx);
18347     }
18348     if (Changed)
18349       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18350   }
18351 
18352   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18353     return InsElt;
18354 
18355   // A shuffle of a single vector that is a splatted value can always be folded.
18356   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18357     return V;
18358 
18359   // If it is a splat, check if the argument vector is another splat or a
18360   // build_vector.
18361   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18362     int SplatIndex = SVN->getSplatIndex();
18363     if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18364         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18365       // splat (vector_bo L, R), Index -->
18366       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18367       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18368       SDLoc DL(N);
18369       EVT EltVT = VT.getScalarType();
18370       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18371       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18372       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18373       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18374                                   N0.getNode()->getFlags());
18375       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18376       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18377       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18378     }
18379 
18380     // If this is a bit convert that changes the element type of the vector but
18381     // not the number of vector elements, look through it.  Be careful not to
18382     // look though conversions that change things like v4f32 to v2f64.
18383     SDNode *V = N0.getNode();
18384     if (V->getOpcode() == ISD::BITCAST) {
18385       SDValue ConvInput = V->getOperand(0);
18386       if (ConvInput.getValueType().isVector() &&
18387           ConvInput.getValueType().getVectorNumElements() == NumElts)
18388         V = ConvInput.getNode();
18389     }
18390 
18391     if (V->getOpcode() == ISD::BUILD_VECTOR) {
18392       assert(V->getNumOperands() == NumElts &&
18393              "BUILD_VECTOR has wrong number of operands");
18394       SDValue Base;
18395       bool AllSame = true;
18396       for (unsigned i = 0; i != NumElts; ++i) {
18397         if (!V->getOperand(i).isUndef()) {
18398           Base = V->getOperand(i);
18399           break;
18400         }
18401       }
18402       // Splat of <u, u, u, u>, return <u, u, u, u>
18403       if (!Base.getNode())
18404         return N0;
18405       for (unsigned i = 0; i != NumElts; ++i) {
18406         if (V->getOperand(i) != Base) {
18407           AllSame = false;
18408           break;
18409         }
18410       }
18411       // Splat of <x, x, x, x>, return <x, x, x, x>
18412       if (AllSame)
18413         return N0;
18414 
18415       // Canonicalize any other splat as a build_vector.
18416       SDValue Splatted = V->getOperand(SplatIndex);
18417       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18418       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18419 
18420       // We may have jumped through bitcasts, so the type of the
18421       // BUILD_VECTOR may not match the type of the shuffle.
18422       if (V->getValueType(0) != VT)
18423         NewBV = DAG.getBitcast(VT, NewBV);
18424       return NewBV;
18425     }
18426   }
18427 
18428   // Simplify source operands based on shuffle mask.
18429   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18430     return SDValue(N, 0);
18431 
18432   // This is intentionally placed after demanded elements simplification because
18433   // it could eliminate knowledge of undef elements created by this shuffle.
18434   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18435     return ShufOp;
18436 
18437   // Match shuffles that can be converted to any_vector_extend_in_reg.
18438   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18439     return V;
18440 
18441   // Combine "truncate_vector_in_reg" style shuffles.
18442   if (SDValue V = combineTruncationShuffle(SVN, DAG))
18443     return V;
18444 
18445   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18446       Level < AfterLegalizeVectorOps &&
18447       (N1.isUndef() ||
18448       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18449        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18450     if (SDValue V = partitionShuffleOfConcats(N, DAG))
18451       return V;
18452   }
18453 
18454   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18455   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18456   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18457     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18458       return Res;
18459 
18460   // If this shuffle only has a single input that is a bitcasted shuffle,
18461   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18462   // back to their original types.
18463   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18464       N1.isUndef() && Level < AfterLegalizeVectorOps &&
18465       TLI.isTypeLegal(VT)) {
18466     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18467       if (Scale == 1)
18468         return SmallVector<int, 8>(Mask.begin(), Mask.end());
18469 
18470       SmallVector<int, 8> NewMask;
18471       for (int M : Mask)
18472         for (int s = 0; s != Scale; ++s)
18473           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18474       return NewMask;
18475     };
18476 
18477     SDValue BC0 = peekThroughOneUseBitcasts(N0);
18478     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18479       EVT SVT = VT.getScalarType();
18480       EVT InnerVT = BC0->getValueType(0);
18481       EVT InnerSVT = InnerVT.getScalarType();
18482 
18483       // Determine which shuffle works with the smaller scalar type.
18484       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
18485       EVT ScaleSVT = ScaleVT.getScalarType();
18486 
18487       if (TLI.isTypeLegal(ScaleVT) &&
18488           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
18489           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
18490         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18491         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18492 
18493         // Scale the shuffle masks to the smaller scalar type.
18494         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
18495         SmallVector<int, 8> InnerMask =
18496             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
18497         SmallVector<int, 8> OuterMask =
18498             ScaleShuffleMask(SVN->getMask(), OuterScale);
18499 
18500         // Merge the shuffle masks.
18501         SmallVector<int, 8> NewMask;
18502         for (int M : OuterMask)
18503           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
18504 
18505         // Test for shuffle mask legality over both commutations.
18506         SDValue SV0 = BC0->getOperand(0);
18507         SDValue SV1 = BC0->getOperand(1);
18508         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18509         if (!LegalMask) {
18510           std::swap(SV0, SV1);
18511           ShuffleVectorSDNode::commuteMask(NewMask);
18512           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18513         }
18514 
18515         if (LegalMask) {
18516           SV0 = DAG.getBitcast(ScaleVT, SV0);
18517           SV1 = DAG.getBitcast(ScaleVT, SV1);
18518           return DAG.getBitcast(
18519               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
18520         }
18521       }
18522     }
18523   }
18524 
18525   // Canonicalize shuffles according to rules:
18526   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
18527   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
18528   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
18529   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
18530       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
18531       TLI.isTypeLegal(VT)) {
18532     // The incoming shuffle must be of the same type as the result of the
18533     // current shuffle.
18534     assert(N1->getOperand(0).getValueType() == VT &&
18535            "Shuffle types don't match");
18536 
18537     SDValue SV0 = N1->getOperand(0);
18538     SDValue SV1 = N1->getOperand(1);
18539     bool HasSameOp0 = N0 == SV0;
18540     bool IsSV1Undef = SV1.isUndef();
18541     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18542       // Commute the operands of this shuffle so that next rule
18543       // will trigger.
18544       return DAG.getCommutedVectorShuffle(*SVN);
18545   }
18546 
18547   // Try to fold according to rules:
18548   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18549   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18550   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18551   // Don't try to fold shuffles with illegal type.
18552   // Only fold if this shuffle is the only user of the other shuffle.
18553   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18554       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18555     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18556 
18557     // Don't try to fold splats; they're likely to simplify somehow, or they
18558     // might be free.
18559     if (OtherSV->isSplat())
18560       return SDValue();
18561 
18562     // The incoming shuffle must be of the same type as the result of the
18563     // current shuffle.
18564     assert(OtherSV->getOperand(0).getValueType() == VT &&
18565            "Shuffle types don't match");
18566 
18567     SDValue SV0, SV1;
18568     SmallVector<int, 4> Mask;
18569     // Compute the combined shuffle mask for a shuffle with SV0 as the first
18570     // operand, and SV1 as the second operand.
18571     for (unsigned i = 0; i != NumElts; ++i) {
18572       int Idx = SVN->getMaskElt(i);
18573       if (Idx < 0) {
18574         // Propagate Undef.
18575         Mask.push_back(Idx);
18576         continue;
18577       }
18578 
18579       SDValue CurrentVec;
18580       if (Idx < (int)NumElts) {
18581         // This shuffle index refers to the inner shuffle N0. Lookup the inner
18582         // shuffle mask to identify which vector is actually referenced.
18583         Idx = OtherSV->getMaskElt(Idx);
18584         if (Idx < 0) {
18585           // Propagate Undef.
18586           Mask.push_back(Idx);
18587           continue;
18588         }
18589 
18590         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18591                                            : OtherSV->getOperand(1);
18592       } else {
18593         // This shuffle index references an element within N1.
18594         CurrentVec = N1;
18595       }
18596 
18597       // Simple case where 'CurrentVec' is UNDEF.
18598       if (CurrentVec.isUndef()) {
18599         Mask.push_back(-1);
18600         continue;
18601       }
18602 
18603       // Canonicalize the shuffle index. We don't know yet if CurrentVec
18604       // will be the first or second operand of the combined shuffle.
18605       Idx = Idx % NumElts;
18606       if (!SV0.getNode() || SV0 == CurrentVec) {
18607         // Ok. CurrentVec is the left hand side.
18608         // Update the mask accordingly.
18609         SV0 = CurrentVec;
18610         Mask.push_back(Idx);
18611         continue;
18612       }
18613 
18614       // Bail out if we cannot convert the shuffle pair into a single shuffle.
18615       if (SV1.getNode() && SV1 != CurrentVec)
18616         return SDValue();
18617 
18618       // Ok. CurrentVec is the right hand side.
18619       // Update the mask accordingly.
18620       SV1 = CurrentVec;
18621       Mask.push_back(Idx + NumElts);
18622     }
18623 
18624     // Check if all indices in Mask are Undef. In case, propagate Undef.
18625     bool isUndefMask = true;
18626     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
18627       isUndefMask &= Mask[i] < 0;
18628 
18629     if (isUndefMask)
18630       return DAG.getUNDEF(VT);
18631 
18632     if (!SV0.getNode())
18633       SV0 = DAG.getUNDEF(VT);
18634     if (!SV1.getNode())
18635       SV1 = DAG.getUNDEF(VT);
18636 
18637     // Avoid introducing shuffles with illegal mask.
18638     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
18639       ShuffleVectorSDNode::commuteMask(Mask);
18640 
18641       if (!TLI.isShuffleMaskLegal(Mask, VT))
18642         return SDValue();
18643 
18644       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18645       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18646       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18647       std::swap(SV0, SV1);
18648     }
18649 
18650     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18651     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18652     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18653     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
18654   }
18655 
18656   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
18657     return V;
18658 
18659   return SDValue();
18660 }
18661 
18662 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
18663   SDValue InVal = N->getOperand(0);
18664   EVT VT = N->getValueType(0);
18665 
18666   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18667   // with a VECTOR_SHUFFLE and possible truncate.
18668   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18669     SDValue InVec = InVal->getOperand(0);
18670     SDValue EltNo = InVal->getOperand(1);
18671     auto InVecT = InVec.getValueType();
18672     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
18673       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
18674       int Elt = C0->getZExtValue();
18675       NewMask[0] = Elt;
18676       SDValue Val;
18677       // If we have an implict truncate do truncate here as long as it's legal.
18678       // if it's not legal, this should
18679       if (VT.getScalarType() != InVal.getValueType() &&
18680           InVal.getValueType().isScalarInteger() &&
18681           isTypeLegal(VT.getScalarType())) {
18682         Val =
18683             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
18684         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
18685       }
18686       if (VT.getScalarType() == InVecT.getScalarType() &&
18687           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
18688           TLI.isShuffleMaskLegal(NewMask, VT)) {
18689         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
18690                                    DAG.getUNDEF(InVecT), NewMask);
18691         // If the initial vector is the correct size this shuffle is a
18692         // valid result.
18693         if (VT == InVecT)
18694           return Val;
18695         // If not we must truncate the vector.
18696         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
18697           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
18698           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
18699           EVT SubVT =
18700               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
18701                                VT.getVectorNumElements());
18702           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
18703                             ZeroIdx);
18704           return Val;
18705         }
18706       }
18707     }
18708   }
18709 
18710   return SDValue();
18711 }
18712 
18713 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
18714   EVT VT = N->getValueType(0);
18715   SDValue N0 = N->getOperand(0);
18716   SDValue N1 = N->getOperand(1);
18717   SDValue N2 = N->getOperand(2);
18718 
18719   // If inserting an UNDEF, just return the original vector.
18720   if (N1.isUndef())
18721     return N0;
18722 
18723   // If this is an insert of an extracted vector into an undef vector, we can
18724   // just use the input to the extract.
18725   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18726       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
18727     return N1.getOperand(0);
18728 
18729   // If we are inserting a bitcast value into an undef, with the same
18730   // number of elements, just use the bitcast input of the extract.
18731   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18732   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18733   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
18734       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18735       N1.getOperand(0).getOperand(1) == N2 &&
18736       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18737           VT.getVectorNumElements() &&
18738       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18739           VT.getSizeInBits()) {
18740     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
18741   }
18742 
18743   // If both N1 and N2 are bitcast values on which insert_subvector
18744   // would makes sense, pull the bitcast through.
18745   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18746   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18747   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
18748     SDValue CN0 = N0.getOperand(0);
18749     SDValue CN1 = N1.getOperand(0);
18750     EVT CN0VT = CN0.getValueType();
18751     EVT CN1VT = CN1.getValueType();
18752     if (CN0VT.isVector() && CN1VT.isVector() &&
18753         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
18754         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
18755       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
18756                                       CN0.getValueType(), CN0, CN1, N2);
18757       return DAG.getBitcast(VT, NewINSERT);
18758     }
18759   }
18760 
18761   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18762   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18763   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18764   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
18765       N0.getOperand(1).getValueType() == N1.getValueType() &&
18766       N0.getOperand(2) == N2)
18767     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18768                        N1, N2);
18769 
18770   // Eliminate an intermediate insert into an undef vector:
18771   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18772   // insert_subvector undef, X, N2
18773   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18774       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18775     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18776                        N1.getOperand(1), N2);
18777 
18778   if (!isa<ConstantSDNode>(N2))
18779     return SDValue();
18780 
18781   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18782 
18783   // Push subvector bitcasts to the output, adjusting the index as we go.
18784   // insert_subvector(bitcast(v), bitcast(s), c1)
18785   // -> bitcast(insert_subvector(v, s, c2))
18786   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
18787       N1.getOpcode() == ISD::BITCAST) {
18788     SDValue N0Src = peekThroughBitcasts(N0);
18789     SDValue N1Src = peekThroughBitcasts(N1);
18790     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
18791     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
18792     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
18793         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
18794       EVT NewVT;
18795       SDLoc DL(N);
18796       SDValue NewIdx;
18797       MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
18798       LLVMContext &Ctx = *DAG.getContext();
18799       unsigned NumElts = VT.getVectorNumElements();
18800       unsigned EltSizeInBits = VT.getScalarSizeInBits();
18801       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
18802         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
18803         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
18804         NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
18805       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
18806         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
18807         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
18808           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
18809           NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
18810         }
18811       }
18812       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
18813         SDValue Res = DAG.getBitcast(NewVT, N0Src);
18814         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
18815         return DAG.getBitcast(VT, Res);
18816       }
18817     }
18818   }
18819 
18820   // Canonicalize insert_subvector dag nodes.
18821   // Example:
18822   // (insert_subvector (insert_subvector A, Idx0), Idx1)
18823   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18824   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18825       N1.getValueType() == N0.getOperand(1).getValueType() &&
18826       isa<ConstantSDNode>(N0.getOperand(2))) {
18827     unsigned OtherIdx = N0.getConstantOperandVal(2);
18828     if (InsIdx < OtherIdx) {
18829       // Swap nodes.
18830       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18831                                   N0.getOperand(0), N1, N2);
18832       AddToWorklist(NewOp.getNode());
18833       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18834                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18835     }
18836   }
18837 
18838   // If the input vector is a concatenation, and the insert replaces
18839   // one of the pieces, we can optimize into a single concat_vectors.
18840   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18841       N0.getOperand(0).getValueType() == N1.getValueType()) {
18842     unsigned Factor = N1.getValueType().getVectorNumElements();
18843 
18844     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18845     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18846 
18847     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18848   }
18849 
18850   // Simplify source operands based on insertion.
18851   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18852     return SDValue(N, 0);
18853 
18854   return SDValue();
18855 }
18856 
18857 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18858   SDValue N0 = N->getOperand(0);
18859 
18860   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18861   if (N0->getOpcode() == ISD::FP16_TO_FP)
18862     return N0->getOperand(0);
18863 
18864   return SDValue();
18865 }
18866 
18867 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18868   SDValue N0 = N->getOperand(0);
18869 
18870   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18871   if (N0->getOpcode() == ISD::AND) {
18872     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18873     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18874       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18875                          N0.getOperand(0));
18876     }
18877   }
18878 
18879   return SDValue();
18880 }
18881 
18882 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
18883   SDValue N0 = N->getOperand(0);
18884   EVT VT = N0.getValueType();
18885   unsigned Opcode = N->getOpcode();
18886 
18887   // VECREDUCE over 1-element vector is just an extract.
18888   if (VT.getVectorNumElements() == 1) {
18889     SDLoc dl(N);
18890     SDValue Res = DAG.getNode(
18891         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
18892         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
18893     if (Res.getValueType() != N->getValueType(0))
18894       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
18895     return Res;
18896   }
18897 
18898   // On an boolean vector an and/or reduction is the same as a umin/umax
18899   // reduction. Convert them if the latter is legal while the former isn't.
18900   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
18901     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
18902         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
18903     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
18904         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
18905         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
18906       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
18907   }
18908 
18909   return SDValue();
18910 }
18911 
18912 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18913 /// with the destination vector and a zero vector.
18914 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18915 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
18916 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18917   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18918 
18919   EVT VT = N->getValueType(0);
18920   SDValue LHS = N->getOperand(0);
18921   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18922   SDLoc DL(N);
18923 
18924   // Make sure we're not running after operation legalization where it
18925   // may have custom lowered the vector shuffles.
18926   if (LegalOperations)
18927     return SDValue();
18928 
18929   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18930     return SDValue();
18931 
18932   EVT RVT = RHS.getValueType();
18933   unsigned NumElts = RHS.getNumOperands();
18934 
18935   // Attempt to create a valid clear mask, splitting the mask into
18936   // sub elements and checking to see if each is
18937   // all zeros or all ones - suitable for shuffle masking.
18938   auto BuildClearMask = [&](int Split) {
18939     int NumSubElts = NumElts * Split;
18940     int NumSubBits = RVT.getScalarSizeInBits() / Split;
18941 
18942     SmallVector<int, 8> Indices;
18943     for (int i = 0; i != NumSubElts; ++i) {
18944       int EltIdx = i / Split;
18945       int SubIdx = i % Split;
18946       SDValue Elt = RHS.getOperand(EltIdx);
18947       if (Elt.isUndef()) {
18948         Indices.push_back(-1);
18949         continue;
18950       }
18951 
18952       APInt Bits;
18953       if (isa<ConstantSDNode>(Elt))
18954         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18955       else if (isa<ConstantFPSDNode>(Elt))
18956         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18957       else
18958         return SDValue();
18959 
18960       // Extract the sub element from the constant bit mask.
18961       if (DAG.getDataLayout().isBigEndian()) {
18962         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18963       } else {
18964         Bits.lshrInPlace(SubIdx * NumSubBits);
18965       }
18966 
18967       if (Split > 1)
18968         Bits = Bits.trunc(NumSubBits);
18969 
18970       if (Bits.isAllOnesValue())
18971         Indices.push_back(i);
18972       else if (Bits == 0)
18973         Indices.push_back(i + NumSubElts);
18974       else
18975         return SDValue();
18976     }
18977 
18978     // Let's see if the target supports this vector_shuffle.
18979     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18980     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18981     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18982       return SDValue();
18983 
18984     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18985     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18986                                                    DAG.getBitcast(ClearVT, LHS),
18987                                                    Zero, Indices));
18988   };
18989 
18990   // Determine maximum split level (byte level masking).
18991   int MaxSplit = 1;
18992   if (RVT.getScalarSizeInBits() % 8 == 0)
18993     MaxSplit = RVT.getScalarSizeInBits() / 8;
18994 
18995   for (int Split = 1; Split <= MaxSplit; ++Split)
18996     if (RVT.getScalarSizeInBits() % Split == 0)
18997       if (SDValue S = BuildClearMask(Split))
18998         return S;
18999 
19000   return SDValue();
19001 }
19002 
19003 /// If a vector binop is performed on splat values, it may be profitable to
19004 /// extract, scalarize, and insert/splat.
19005 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19006   SDValue N0 = N->getOperand(0);
19007   SDValue N1 = N->getOperand(1);
19008   unsigned Opcode = N->getOpcode();
19009   EVT VT = N->getValueType(0);
19010   EVT EltVT = VT.getVectorElementType();
19011   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19012 
19013   // TODO: Remove/replace the extract cost check? If the elements are available
19014   //       as scalars, then there may be no extract cost. Should we ask if
19015   //       inserting a scalar back into a vector is cheap instead?
19016   int Index0, Index1;
19017   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19018   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19019   if (!Src0 || !Src1 || Index0 != Index1 ||
19020       Src0.getValueType().getVectorElementType() != EltVT ||
19021       Src1.getValueType().getVectorElementType() != EltVT ||
19022       !TLI.isExtractVecEltCheap(VT, Index0) ||
19023       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19024     return SDValue();
19025 
19026   SDLoc DL(N);
19027   SDValue IndexC =
19028       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19029   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19030   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19031   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19032 
19033   // If all lanes but 1 are undefined, no need to splat the scalar result.
19034   // TODO: Keep track of undefs and use that info in the general case.
19035   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19036       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19037       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19038     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19039     // build_vec ..undef, (bo X, Y), undef...
19040     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19041     Ops[Index0] = ScalarBO;
19042     return DAG.getBuildVector(VT, DL, Ops);
19043   }
19044 
19045   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19046   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19047   return DAG.getBuildVector(VT, DL, Ops);
19048 }
19049 
19050 /// Visit a binary vector operation, like ADD.
19051 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19052   assert(N->getValueType(0).isVector() &&
19053          "SimplifyVBinOp only works on vectors!");
19054 
19055   SDValue LHS = N->getOperand(0);
19056   SDValue RHS = N->getOperand(1);
19057   SDValue Ops[] = {LHS, RHS};
19058   EVT VT = N->getValueType(0);
19059   unsigned Opcode = N->getOpcode();
19060 
19061   // See if we can constant fold the vector operation.
19062   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19063           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19064     return Fold;
19065 
19066   // Move unary shuffles with identical masks after a vector binop:
19067   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19068   //   --> shuffle (VBinOp A, B), Undef, Mask
19069   // This does not require type legality checks because we are creating the
19070   // same types of operations that are in the original sequence. We do have to
19071   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19072   // though. This code is adapted from the identical transform in instcombine.
19073   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19074       Opcode != ISD::UREM && Opcode != ISD::SREM &&
19075       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19076     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19077     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19078     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19079         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19080         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19081       SDLoc DL(N);
19082       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19083                                      RHS.getOperand(0), N->getFlags());
19084       SDValue UndefV = LHS.getOperand(1);
19085       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19086     }
19087   }
19088 
19089   // The following pattern is likely to emerge with vector reduction ops. Moving
19090   // the binary operation ahead of insertion may allow using a narrower vector
19091   // instruction that has better performance than the wide version of the op:
19092   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19093   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19094       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19095       LHS.getOperand(2) == RHS.getOperand(2) &&
19096       (LHS.hasOneUse() || RHS.hasOneUse())) {
19097     SDValue X = LHS.getOperand(1);
19098     SDValue Y = RHS.getOperand(1);
19099     SDValue Z = LHS.getOperand(2);
19100     EVT NarrowVT = X.getValueType();
19101     if (NarrowVT == Y.getValueType() &&
19102         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19103       // (binop undef, undef) may not return undef, so compute that result.
19104       SDLoc DL(N);
19105       SDValue VecC =
19106           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19107       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19108       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19109     }
19110   }
19111 
19112   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19113     return V;
19114 
19115   return SDValue();
19116 }
19117 
19118 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19119                                     SDValue N2) {
19120   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19121 
19122   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19123                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
19124 
19125   // If we got a simplified select_cc node back from SimplifySelectCC, then
19126   // break it down into a new SETCC node, and a new SELECT node, and then return
19127   // the SELECT node, since we were called with a SELECT node.
19128   if (SCC.getNode()) {
19129     // Check to see if we got a select_cc back (to turn into setcc/select).
19130     // Otherwise, just return whatever node we got back, like fabs.
19131     if (SCC.getOpcode() == ISD::SELECT_CC) {
19132       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19133                                   N0.getValueType(),
19134                                   SCC.getOperand(0), SCC.getOperand(1),
19135                                   SCC.getOperand(4));
19136       AddToWorklist(SETCC.getNode());
19137       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19138                            SCC.getOperand(2), SCC.getOperand(3));
19139     }
19140 
19141     return SCC;
19142   }
19143   return SDValue();
19144 }
19145 
19146 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19147 /// being selected between, see if we can simplify the select.  Callers of this
19148 /// should assume that TheSelect is deleted if this returns true.  As such, they
19149 /// should return the appropriate thing (e.g. the node) back to the top-level of
19150 /// the DAG combiner loop to avoid it being looked at.
19151 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19152                                     SDValue RHS) {
19153   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19154   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19155   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19156     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19157       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19158       SDValue Sqrt = RHS;
19159       ISD::CondCode CC;
19160       SDValue CmpLHS;
19161       const ConstantFPSDNode *Zero = nullptr;
19162 
19163       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19164         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19165         CmpLHS = TheSelect->getOperand(0);
19166         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19167       } else {
19168         // SELECT or VSELECT
19169         SDValue Cmp = TheSelect->getOperand(0);
19170         if (Cmp.getOpcode() == ISD::SETCC) {
19171           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19172           CmpLHS = Cmp.getOperand(0);
19173           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19174         }
19175       }
19176       if (Zero && Zero->isZero() &&
19177           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19178           CC == ISD::SETULT || CC == ISD::SETLT)) {
19179         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19180         CombineTo(TheSelect, Sqrt);
19181         return true;
19182       }
19183     }
19184   }
19185   // Cannot simplify select with vector condition
19186   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19187 
19188   // If this is a select from two identical things, try to pull the operation
19189   // through the select.
19190   if (LHS.getOpcode() != RHS.getOpcode() ||
19191       !LHS.hasOneUse() || !RHS.hasOneUse())
19192     return false;
19193 
19194   // If this is a load and the token chain is identical, replace the select
19195   // of two loads with a load through a select of the address to load from.
19196   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19197   // constants have been dropped into the constant pool.
19198   if (LHS.getOpcode() == ISD::LOAD) {
19199     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19200     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19201 
19202     // Token chains must be identical.
19203     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19204         // Do not let this transformation reduce the number of volatile loads.
19205         LLD->isVolatile() || RLD->isVolatile() ||
19206         // FIXME: If either is a pre/post inc/dec load,
19207         // we'd need to split out the address adjustment.
19208         LLD->isIndexed() || RLD->isIndexed() ||
19209         // If this is an EXTLOAD, the VT's must match.
19210         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19211         // If this is an EXTLOAD, the kind of extension must match.
19212         (LLD->getExtensionType() != RLD->getExtensionType() &&
19213          // The only exception is if one of the extensions is anyext.
19214          LLD->getExtensionType() != ISD::EXTLOAD &&
19215          RLD->getExtensionType() != ISD::EXTLOAD) ||
19216         // FIXME: this discards src value information.  This is
19217         // over-conservative. It would be beneficial to be able to remember
19218         // both potential memory locations.  Since we are discarding
19219         // src value info, don't do the transformation if the memory
19220         // locations are not in the default address space.
19221         LLD->getPointerInfo().getAddrSpace() != 0 ||
19222         RLD->getPointerInfo().getAddrSpace() != 0 ||
19223         // We can't produce a CMOV of a TargetFrameIndex since we won't
19224         // generate the address generation required.
19225         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19226         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19227         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19228                                       LLD->getBasePtr().getValueType()))
19229       return false;
19230 
19231     // The loads must not depend on one another.
19232     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19233       return false;
19234 
19235     // Check that the select condition doesn't reach either load.  If so,
19236     // folding this will induce a cycle into the DAG.  If not, this is safe to
19237     // xform, so create a select of the addresses.
19238 
19239     SmallPtrSet<const SDNode *, 32> Visited;
19240     SmallVector<const SDNode *, 16> Worklist;
19241 
19242     // Always fail if LLD and RLD are not independent. TheSelect is a
19243     // predecessor to all Nodes in question so we need not search past it.
19244 
19245     Visited.insert(TheSelect);
19246     Worklist.push_back(LLD);
19247     Worklist.push_back(RLD);
19248 
19249     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19250         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19251       return false;
19252 
19253     SDValue Addr;
19254     if (TheSelect->getOpcode() == ISD::SELECT) {
19255       // We cannot do this optimization if any pair of {RLD, LLD} is a
19256       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19257       // Loads, we only need to check if CondNode is a successor to one of the
19258       // loads. We can further avoid this if there's no use of their chain
19259       // value.
19260       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19261       Worklist.push_back(CondNode);
19262 
19263       if ((LLD->hasAnyUseOfValue(1) &&
19264            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19265           (RLD->hasAnyUseOfValue(1) &&
19266            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19267         return false;
19268 
19269       Addr = DAG.getSelect(SDLoc(TheSelect),
19270                            LLD->getBasePtr().getValueType(),
19271                            TheSelect->getOperand(0), LLD->getBasePtr(),
19272                            RLD->getBasePtr());
19273     } else {  // Otherwise SELECT_CC
19274       // We cannot do this optimization if any pair of {RLD, LLD} is a
19275       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19276       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19277       // one of the loads. We can further avoid this if there's no use of their
19278       // chain value.
19279 
19280       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19281       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19282       Worklist.push_back(CondLHS);
19283       Worklist.push_back(CondRHS);
19284 
19285       if ((LLD->hasAnyUseOfValue(1) &&
19286            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19287           (RLD->hasAnyUseOfValue(1) &&
19288            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19289         return false;
19290 
19291       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19292                          LLD->getBasePtr().getValueType(),
19293                          TheSelect->getOperand(0),
19294                          TheSelect->getOperand(1),
19295                          LLD->getBasePtr(), RLD->getBasePtr(),
19296                          TheSelect->getOperand(4));
19297     }
19298 
19299     SDValue Load;
19300     // It is safe to replace the two loads if they have different alignments,
19301     // but the new load must be the minimum (most restrictive) alignment of the
19302     // inputs.
19303     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19304     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19305     if (!RLD->isInvariant())
19306       MMOFlags &= ~MachineMemOperand::MOInvariant;
19307     if (!RLD->isDereferenceable())
19308       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19309     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19310       // FIXME: Discards pointer and AA info.
19311       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19312                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19313                          MMOFlags);
19314     } else {
19315       // FIXME: Discards pointer and AA info.
19316       Load = DAG.getExtLoad(
19317           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19318                                                   : LLD->getExtensionType(),
19319           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19320           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19321     }
19322 
19323     // Users of the select now use the result of the load.
19324     CombineTo(TheSelect, Load);
19325 
19326     // Users of the old loads now use the new load's chain.  We know the
19327     // old-load value is dead now.
19328     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19329     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19330     return true;
19331   }
19332 
19333   return false;
19334 }
19335 
19336 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19337 /// bitwise 'and'.
19338 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19339                                             SDValue N1, SDValue N2, SDValue N3,
19340                                             ISD::CondCode CC) {
19341   // If this is a select where the false operand is zero and the compare is a
19342   // check of the sign bit, see if we can perform the "gzip trick":
19343   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19344   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19345   EVT XType = N0.getValueType();
19346   EVT AType = N2.getValueType();
19347   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19348     return SDValue();
19349 
19350   // If the comparison is testing for a positive value, we have to invert
19351   // the sign bit mask, so only do that transform if the target has a bitwise
19352   // 'and not' instruction (the invert is free).
19353   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19354     // (X > -1) ? A : 0
19355     // (X >  0) ? X : 0 <-- This is canonical signed max.
19356     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19357       return SDValue();
19358   } else if (CC == ISD::SETLT) {
19359     // (X <  0) ? A : 0
19360     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19361     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19362       return SDValue();
19363   } else {
19364     return SDValue();
19365   }
19366 
19367   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19368   // constant.
19369   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19370   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19371   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19372     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19373     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19374     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19375     AddToWorklist(Shift.getNode());
19376 
19377     if (XType.bitsGT(AType)) {
19378       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19379       AddToWorklist(Shift.getNode());
19380     }
19381 
19382     if (CC == ISD::SETGT)
19383       Shift = DAG.getNOT(DL, Shift, AType);
19384 
19385     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19386   }
19387 
19388   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19389   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19390   AddToWorklist(Shift.getNode());
19391 
19392   if (XType.bitsGT(AType)) {
19393     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19394     AddToWorklist(Shift.getNode());
19395   }
19396 
19397   if (CC == ISD::SETGT)
19398     Shift = DAG.getNOT(DL, Shift, AType);
19399 
19400   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19401 }
19402 
19403 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19404 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19405 /// in it. This may be a win when the constant is not otherwise available
19406 /// because it replaces two constant pool loads with one.
19407 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19408     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19409     ISD::CondCode CC) {
19410   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
19411     return SDValue();
19412 
19413   // If we are before legalize types, we want the other legalization to happen
19414   // first (for example, to avoid messing with soft float).
19415   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19416   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19417   EVT VT = N2.getValueType();
19418   if (!TV || !FV || !TLI.isTypeLegal(VT))
19419     return SDValue();
19420 
19421   // If a constant can be materialized without loads, this does not make sense.
19422   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19423       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19424       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19425     return SDValue();
19426 
19427   // If both constants have multiple uses, then we won't need to do an extra
19428   // load. The values are likely around in registers for other users.
19429   if (!TV->hasOneUse() && !FV->hasOneUse())
19430     return SDValue();
19431 
19432   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19433                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19434   Type *FPTy = Elts[0]->getType();
19435   const DataLayout &TD = DAG.getDataLayout();
19436 
19437   // Create a ConstantArray of the two constants.
19438   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19439   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19440                                       TD.getPrefTypeAlignment(FPTy));
19441   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19442 
19443   // Get offsets to the 0 and 1 elements of the array, so we can select between
19444   // them.
19445   SDValue Zero = DAG.getIntPtrConstant(0, DL);
19446   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19447   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19448   SDValue Cond =
19449       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19450   AddToWorklist(Cond.getNode());
19451   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19452   AddToWorklist(CstOffset.getNode());
19453   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19454   AddToWorklist(CPIdx.getNode());
19455   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19456                      MachinePointerInfo::getConstantPool(
19457                          DAG.getMachineFunction()), Alignment);
19458 }
19459 
19460 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19461 /// where 'cond' is the comparison specified by CC.
19462 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19463                                       SDValue N2, SDValue N3, ISD::CondCode CC,
19464                                       bool NotExtCompare) {
19465   // (x ? y : y) -> y.
19466   if (N2 == N3) return N2;
19467 
19468   EVT CmpOpVT = N0.getValueType();
19469   EVT CmpResVT = getSetCCResultType(CmpOpVT);
19470   EVT VT = N2.getValueType();
19471   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19472   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19473   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19474 
19475   // Determine if the condition we're dealing with is constant.
19476   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19477     AddToWorklist(SCC.getNode());
19478     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
19479       // fold select_cc true, x, y -> x
19480       // fold select_cc false, x, y -> y
19481       return !(SCCC->isNullValue()) ? N2 : N3;
19482     }
19483   }
19484 
19485   if (SDValue V =
19486           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
19487     return V;
19488 
19489   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
19490     return V;
19491 
19492   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
19493   // where y is has a single bit set.
19494   // A plaintext description would be, we can turn the SELECT_CC into an AND
19495   // when the condition can be materialized as an all-ones register.  Any
19496   // single bit-test can be materialized as an all-ones register with
19497   // shift-left and shift-right-arith.
19498   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
19499       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
19500     SDValue AndLHS = N0->getOperand(0);
19501     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
19502     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
19503       // Shift the tested bit over the sign bit.
19504       const APInt &AndMask = ConstAndRHS->getAPIntValue();
19505       SDValue ShlAmt =
19506         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
19507                         getShiftAmountTy(AndLHS.getValueType()));
19508       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
19509 
19510       // Now arithmetic right shift it all the way over, so the result is either
19511       // all-ones, or zero.
19512       SDValue ShrAmt =
19513         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
19514                         getShiftAmountTy(Shl.getValueType()));
19515       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
19516 
19517       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
19518     }
19519   }
19520 
19521   // fold select C, 16, 0 -> shl C, 4
19522   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
19523   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
19524 
19525   if ((Fold || Swap) &&
19526       TLI.getBooleanContents(CmpOpVT) ==
19527           TargetLowering::ZeroOrOneBooleanContent &&
19528       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
19529 
19530     if (Swap) {
19531       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
19532       std::swap(N2C, N3C);
19533     }
19534 
19535     // If the caller doesn't want us to simplify this into a zext of a compare,
19536     // don't do it.
19537     if (NotExtCompare && N2C->isOne())
19538       return SDValue();
19539 
19540     SDValue Temp, SCC;
19541     // zext (setcc n0, n1)
19542     if (LegalTypes) {
19543       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
19544       if (VT.bitsLT(SCC.getValueType()))
19545         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
19546       else
19547         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19548     } else {
19549       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
19550       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19551     }
19552 
19553     AddToWorklist(SCC.getNode());
19554     AddToWorklist(Temp.getNode());
19555 
19556     if (N2C->isOne())
19557       return Temp;
19558 
19559     // shl setcc result by log2 n2c
19560     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
19561                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
19562                                        SDLoc(Temp),
19563                                        getShiftAmountTy(Temp.getValueType())));
19564   }
19565 
19566   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
19567   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
19568   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
19569   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
19570   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
19571   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
19572   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
19573   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
19574   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
19575     SDValue ValueOnZero = N2;
19576     SDValue Count = N3;
19577     // If the condition is NE instead of E, swap the operands.
19578     if (CC == ISD::SETNE)
19579       std::swap(ValueOnZero, Count);
19580     // Check if the value on zero is a constant equal to the bits in the type.
19581     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
19582       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
19583         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
19584         // legal, combine to just cttz.
19585         if ((Count.getOpcode() == ISD::CTTZ ||
19586              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
19587             N0 == Count.getOperand(0) &&
19588             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
19589           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
19590         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
19591         // legal, combine to just ctlz.
19592         if ((Count.getOpcode() == ISD::CTLZ ||
19593              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
19594             N0 == Count.getOperand(0) &&
19595             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
19596           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
19597       }
19598     }
19599   }
19600 
19601   return SDValue();
19602 }
19603 
19604 /// This is a stub for TargetLowering::SimplifySetCC.
19605 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
19606                                    ISD::CondCode Cond, const SDLoc &DL,
19607                                    bool foldBooleans) {
19608   TargetLowering::DAGCombinerInfo
19609     DagCombineInfo(DAG, Level, false, this);
19610   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
19611 }
19612 
19613 /// Given an ISD::SDIV node expressing a divide by constant, return
19614 /// a DAG expression to select that will generate the same value by multiplying
19615 /// by a magic number.
19616 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19617 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
19618   // when optimising for minimum size, we don't want to expand a div to a mul
19619   // and a shift.
19620   if (DAG.getMachineFunction().getFunction().hasMinSize())
19621     return SDValue();
19622 
19623   SmallVector<SDNode *, 8> Built;
19624   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
19625     for (SDNode *N : Built)
19626       AddToWorklist(N);
19627     return S;
19628   }
19629 
19630   return SDValue();
19631 }
19632 
19633 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19634 /// DAG expression that will generate the same value by right shifting.
19635 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
19636   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
19637   if (!C)
19638     return SDValue();
19639 
19640   // Avoid division by zero.
19641   if (C->isNullValue())
19642     return SDValue();
19643 
19644   SmallVector<SDNode *, 8> Built;
19645   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
19646     for (SDNode *N : Built)
19647       AddToWorklist(N);
19648     return S;
19649   }
19650 
19651   return SDValue();
19652 }
19653 
19654 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19655 /// expression that will generate the same value by multiplying by a magic
19656 /// number.
19657 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19658 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
19659   // when optimising for minimum size, we don't want to expand a div to a mul
19660   // and a shift.
19661   if (DAG.getMachineFunction().getFunction().hasMinSize())
19662     return SDValue();
19663 
19664   SmallVector<SDNode *, 8> Built;
19665   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
19666     for (SDNode *N : Built)
19667       AddToWorklist(N);
19668     return S;
19669   }
19670 
19671   return SDValue();
19672 }
19673 
19674 /// Determines the LogBase2 value for a non-null input value using the
19675 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19676 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
19677   EVT VT = V.getValueType();
19678   unsigned EltBits = VT.getScalarSizeInBits();
19679   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
19680   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
19681   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
19682   return LogBase2;
19683 }
19684 
19685 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19686 /// For the reciprocal, we need to find the zero of the function:
19687 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
19688 ///     =>
19689 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19690 ///     does not require additional intermediate precision]
19691 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
19692   if (Level >= AfterLegalizeDAG)
19693     return SDValue();
19694 
19695   // TODO: Handle half and/or extended types?
19696   EVT VT = Op.getValueType();
19697   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19698     return SDValue();
19699 
19700   // If estimates are explicitly disabled for this function, we're done.
19701   MachineFunction &MF = DAG.getMachineFunction();
19702   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
19703   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19704     return SDValue();
19705 
19706   // Estimates may be explicitly enabled for this type with a custom number of
19707   // refinement steps.
19708   int Iterations = TLI.getDivRefinementSteps(VT, MF);
19709   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
19710     AddToWorklist(Est.getNode());
19711 
19712     if (Iterations) {
19713       SDLoc DL(Op);
19714       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19715 
19716       // Newton iterations: Est = Est + Est (1 - Arg * Est)
19717       for (int i = 0; i < Iterations; ++i) {
19718         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
19719         AddToWorklist(NewEst.getNode());
19720 
19721         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
19722         AddToWorklist(NewEst.getNode());
19723 
19724         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19725         AddToWorklist(NewEst.getNode());
19726 
19727         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
19728         AddToWorklist(Est.getNode());
19729       }
19730     }
19731     return Est;
19732   }
19733 
19734   return SDValue();
19735 }
19736 
19737 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19738 /// For the reciprocal sqrt, we need to find the zero of the function:
19739 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19740 ///     =>
19741 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19742 /// As a result, we precompute A/2 prior to the iteration loop.
19743 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
19744                                          unsigned Iterations,
19745                                          SDNodeFlags Flags, bool Reciprocal) {
19746   EVT VT = Arg.getValueType();
19747   SDLoc DL(Arg);
19748   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
19749 
19750   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19751   // this entire sequence requires only one FP constant.
19752   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
19753   AddToWorklist(HalfArg.getNode());
19754 
19755   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
19756   AddToWorklist(HalfArg.getNode());
19757 
19758   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19759   for (unsigned i = 0; i < Iterations; ++i) {
19760     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
19761     AddToWorklist(NewEst.getNode());
19762 
19763     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
19764     AddToWorklist(NewEst.getNode());
19765 
19766     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
19767     AddToWorklist(NewEst.getNode());
19768 
19769     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19770     AddToWorklist(Est.getNode());
19771   }
19772 
19773   // If non-reciprocal square root is requested, multiply the result by Arg.
19774   if (!Reciprocal) {
19775     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
19776     AddToWorklist(Est.getNode());
19777   }
19778 
19779   return Est;
19780 }
19781 
19782 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19783 /// For the reciprocal sqrt, we need to find the zero of the function:
19784 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19785 ///     =>
19786 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19787 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
19788                                          unsigned Iterations,
19789                                          SDNodeFlags Flags, bool Reciprocal) {
19790   EVT VT = Arg.getValueType();
19791   SDLoc DL(Arg);
19792   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
19793   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
19794 
19795   // This routine must enter the loop below to work correctly
19796   // when (Reciprocal == false).
19797   assert(Iterations > 0);
19798 
19799   // Newton iterations for reciprocal square root:
19800   // E = (E * -0.5) * ((A * E) * E + -3.0)
19801   for (unsigned i = 0; i < Iterations; ++i) {
19802     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
19803     AddToWorklist(AE.getNode());
19804 
19805     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
19806     AddToWorklist(AEE.getNode());
19807 
19808     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
19809     AddToWorklist(RHS.getNode());
19810 
19811     // When calculating a square root at the last iteration build:
19812     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19813     // (notice a common subexpression)
19814     SDValue LHS;
19815     if (Reciprocal || (i + 1) < Iterations) {
19816       // RSQRT: LHS = (E * -0.5)
19817       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
19818     } else {
19819       // SQRT: LHS = (A * E) * -0.5
19820       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
19821     }
19822     AddToWorklist(LHS.getNode());
19823 
19824     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
19825     AddToWorklist(Est.getNode());
19826   }
19827 
19828   return Est;
19829 }
19830 
19831 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19832 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19833 /// Op can be zero.
19834 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
19835                                            bool Reciprocal) {
19836   if (Level >= AfterLegalizeDAG)
19837     return SDValue();
19838 
19839   // TODO: Handle half and/or extended types?
19840   EVT VT = Op.getValueType();
19841   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19842     return SDValue();
19843 
19844   // If estimates are explicitly disabled for this function, we're done.
19845   MachineFunction &MF = DAG.getMachineFunction();
19846   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
19847   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19848     return SDValue();
19849 
19850   // Estimates may be explicitly enabled for this type with a custom number of
19851   // refinement steps.
19852   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
19853 
19854   bool UseOneConstNR = false;
19855   if (SDValue Est =
19856       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
19857                           Reciprocal)) {
19858     AddToWorklist(Est.getNode());
19859 
19860     if (Iterations) {
19861       Est = UseOneConstNR
19862             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
19863             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
19864 
19865       if (!Reciprocal) {
19866         // The estimate is now completely wrong if the input was exactly 0.0 or
19867         // possibly a denormal. Force the answer to 0.0 for those cases.
19868         SDLoc DL(Op);
19869         EVT CCVT = getSetCCResultType(VT);
19870         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
19871         const Function &F = DAG.getMachineFunction().getFunction();
19872         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
19873         if (Denorms.getValueAsString().equals("ieee")) {
19874           // fabs(X) < SmallestNormal ? 0.0 : Est
19875           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
19876           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
19877           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
19878           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19879           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
19880           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19881           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19882           AddToWorklist(Fabs.getNode());
19883           AddToWorklist(IsDenorm.getNode());
19884           AddToWorklist(Est.getNode());
19885         } else {
19886           // X == 0.0 ? 0.0 : Est
19887           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19888           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19889           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19890           AddToWorklist(IsZero.getNode());
19891           AddToWorklist(Est.getNode());
19892         }
19893       }
19894     }
19895     return Est;
19896   }
19897 
19898   return SDValue();
19899 }
19900 
19901 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19902   return buildSqrtEstimateImpl(Op, Flags, true);
19903 }
19904 
19905 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19906   return buildSqrtEstimateImpl(Op, Flags, false);
19907 }
19908 
19909 /// Return true if there is any possibility that the two addresses overlap.
19910 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
19911 
19912   struct MemUseCharacteristics {
19913     bool IsVolatile;
19914     SDValue BasePtr;
19915     int64_t Offset;
19916     Optional<int64_t> NumBytes;
19917     MachineMemOperand *MMO;
19918   };
19919 
19920   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
19921     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
19922       int64_t Offset = 0;
19923       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
19924         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
19925                      ? C->getSExtValue()
19926                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
19927                            ? -1 * C->getSExtValue()
19928                            : 0;
19929       return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
19930               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
19931               LSN->getMemOperand()};
19932     }
19933     if (const auto *LN = cast<LifetimeSDNode>(N))
19934       return {false /*isVolatile*/, LN->getOperand(1),
19935               (LN->hasOffset()) ? LN->getOffset() : 0,
19936               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
19937                                 : Optional<int64_t>(),
19938               (MachineMemOperand *)nullptr};
19939     // Default.
19940     return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
19941             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
19942   };
19943 
19944   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
19945                         MUC1 = getCharacteristics(Op1);
19946 
19947   // If they are to the same address, then they must be aliases.
19948   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
19949       MUC0.Offset == MUC1.Offset)
19950     return true;
19951 
19952   // If they are both volatile then they cannot be reordered.
19953   if (MUC0.IsVolatile && MUC1.IsVolatile)
19954     return true;
19955 
19956   if (MUC0.MMO && MUC1.MMO) {
19957     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19958         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19959       return false;
19960   }
19961 
19962   // Try to prove that there is aliasing, or that there is no aliasing. Either
19963   // way, we can return now. If nothing can be proved, proceed with more tests.
19964   bool IsAlias;
19965   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
19966                                        DAG, IsAlias))
19967     return IsAlias;
19968 
19969   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
19970   // either are not known.
19971   if (!MUC0.MMO || !MUC1.MMO)
19972     return true;
19973 
19974   // If one operation reads from invariant memory, and the other may store, they
19975   // cannot alias. These should really be checking the equivalent of mayWrite,
19976   // but it only matters for memory nodes other than load /store.
19977   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19978       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19979     return false;
19980 
19981   // If we know required SrcValue1 and SrcValue2 have relatively large
19982   // alignment compared to the size and offset of the access, we may be able
19983   // to prove they do not alias. This check is conservative for now to catch
19984   // cases created by splitting vector types.
19985   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
19986   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
19987   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
19988   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
19989   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19990       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
19991       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
19992     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19993     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19994 
19995     // There is no overlap between these relatively aligned accesses of
19996     // similar size. Return no alias.
19997     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
19998         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
19999       return false;
20000   }
20001 
20002   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20003                    ? CombinerGlobalAA
20004                    : DAG.getSubtarget().useAA();
20005 #ifndef NDEBUG
20006   if (CombinerAAOnlyFunc.getNumOccurrences() &&
20007       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20008     UseAA = false;
20009 #endif
20010 
20011   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20012     // Use alias analysis information.
20013     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20014     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20015     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20016     AliasResult AAResult = AA->alias(
20017         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20018                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20019         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20020                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20021     if (AAResult == NoAlias)
20022       return false;
20023   }
20024 
20025   // Otherwise we have to assume they alias.
20026   return true;
20027 }
20028 
20029 /// Walk up chain skipping non-aliasing memory nodes,
20030 /// looking for aliasing nodes and adding them to the Aliases vector.
20031 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20032                                    SmallVectorImpl<SDValue> &Aliases) {
20033   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20034   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20035 
20036   // Get alias information for node.
20037   const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
20038 
20039   // Starting off.
20040   Chains.push_back(OriginalChain);
20041   unsigned Depth = 0;
20042 
20043   // Attempt to improve chain by a single step
20044   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20045     switch (C.getOpcode()) {
20046     case ISD::EntryToken:
20047       // No need to mark EntryToken.
20048       C = SDValue();
20049       return true;
20050     case ISD::LOAD:
20051     case ISD::STORE: {
20052       // Get alias information for C.
20053       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20054                       !cast<LSBaseSDNode>(C.getNode())->isVolatile();
20055       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20056         // Look further up the chain.
20057         C = C.getOperand(0);
20058         return true;
20059       }
20060       // Alias, so stop here.
20061       return false;
20062     }
20063 
20064     case ISD::CopyFromReg:
20065       // Always forward past past CopyFromReg.
20066       C = C.getOperand(0);
20067       return true;
20068 
20069     case ISD::LIFETIME_START:
20070     case ISD::LIFETIME_END: {
20071       // We can forward past any lifetime start/end that can be proven not to
20072       // alias the memory access.
20073       if (!isAlias(N, C.getNode())) {
20074         // Look further up the chain.
20075         C = C.getOperand(0);
20076         return true;
20077       }
20078       return false;
20079     }
20080     default:
20081       return false;
20082     }
20083   };
20084 
20085   // Look at each chain and determine if it is an alias.  If so, add it to the
20086   // aliases list.  If not, then continue up the chain looking for the next
20087   // candidate.
20088   while (!Chains.empty()) {
20089     SDValue Chain = Chains.pop_back_val();
20090 
20091     // Don't bother if we've seen Chain before.
20092     if (!Visited.insert(Chain.getNode()).second)
20093       continue;
20094 
20095     // For TokenFactor nodes, look at each operand and only continue up the
20096     // chain until we reach the depth limit.
20097     //
20098     // FIXME: The depth check could be made to return the last non-aliasing
20099     // chain we found before we hit a tokenfactor rather than the original
20100     // chain.
20101     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20102       Aliases.clear();
20103       Aliases.push_back(OriginalChain);
20104       return;
20105     }
20106 
20107     if (Chain.getOpcode() == ISD::TokenFactor) {
20108       // We have to check each of the operands of the token factor for "small"
20109       // token factors, so we queue them up.  Adding the operands to the queue
20110       // (stack) in reverse order maintains the original order and increases the
20111       // likelihood that getNode will find a matching token factor (CSE.)
20112       if (Chain.getNumOperands() > 16) {
20113         Aliases.push_back(Chain);
20114         continue;
20115       }
20116       for (unsigned n = Chain.getNumOperands(); n;)
20117         Chains.push_back(Chain.getOperand(--n));
20118       ++Depth;
20119       continue;
20120     }
20121     // Everything else
20122     if (ImproveChain(Chain)) {
20123       // Updated Chain Found, Consider new chain if one exists.
20124       if (Chain.getNode())
20125         Chains.push_back(Chain);
20126       ++Depth;
20127       continue;
20128     }
20129     // No Improved Chain Possible, treat as Alias.
20130     Aliases.push_back(Chain);
20131   }
20132 }
20133 
20134 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20135 /// (aliasing node.)
20136 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20137   if (OptLevel == CodeGenOpt::None)
20138     return OldChain;
20139 
20140   // Ops for replacing token factor.
20141   SmallVector<SDValue, 8> Aliases;
20142 
20143   // Accumulate all the aliases to this node.
20144   GatherAllAliases(N, OldChain, Aliases);
20145 
20146   // If no operands then chain to entry token.
20147   if (Aliases.size() == 0)
20148     return DAG.getEntryNode();
20149 
20150   // If a single operand then chain to it.  We don't need to revisit it.
20151   if (Aliases.size() == 1)
20152     return Aliases[0];
20153 
20154   // Construct a custom tailored token factor.
20155   return DAG.getTokenFactor(SDLoc(N), Aliases);
20156 }
20157 
20158 namespace {
20159 // TODO: Replace with with std::monostate when we move to C++17.
20160 struct UnitT { } Unit;
20161 bool operator==(const UnitT &, const UnitT &) { return true; }
20162 bool operator!=(const UnitT &, const UnitT &) { return false; }
20163 } // namespace
20164 
20165 // This function tries to collect a bunch of potentially interesting
20166 // nodes to improve the chains of, all at once. This might seem
20167 // redundant, as this function gets called when visiting every store
20168 // node, so why not let the work be done on each store as it's visited?
20169 //
20170 // I believe this is mainly important because MergeConsecutiveStores
20171 // is unable to deal with merging stores of different sizes, so unless
20172 // we improve the chains of all the potential candidates up-front
20173 // before running MergeConsecutiveStores, it might only see some of
20174 // the nodes that will eventually be candidates, and then not be able
20175 // to go from a partially-merged state to the desired final
20176 // fully-merged state.
20177 
20178 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20179   SmallVector<StoreSDNode *, 8> ChainedStores;
20180   StoreSDNode *STChain = St;
20181   // Intervals records which offsets from BaseIndex have been covered. In
20182   // the common case, every store writes to the immediately previous address
20183   // space and thus merged with the previous interval at insertion time.
20184 
20185   using IMap =
20186       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20187   IMap::Allocator A;
20188   IMap Intervals(A);
20189 
20190   // This holds the base pointer, index, and the offset in bytes from the base
20191   // pointer.
20192   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20193 
20194   // We must have a base and an offset.
20195   if (!BasePtr.getBase().getNode())
20196     return false;
20197 
20198   // Do not handle stores to undef base pointers.
20199   if (BasePtr.getBase().isUndef())
20200     return false;
20201 
20202   // Add ST's interval.
20203   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20204 
20205   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20206     // If the chain has more than one use, then we can't reorder the mem ops.
20207     if (!SDValue(Chain, 0)->hasOneUse())
20208       break;
20209     if (Chain->isVolatile() || Chain->isIndexed())
20210       break;
20211 
20212     // Find the base pointer and offset for this memory node.
20213     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20214     // Check that the base pointer is the same as the original one.
20215     int64_t Offset;
20216     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20217       break;
20218     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20219     // Make sure we don't overlap with other intervals by checking the ones to
20220     // the left or right before inserting.
20221     auto I = Intervals.find(Offset);
20222     // If there's a next interval, we should end before it.
20223     if (I != Intervals.end() && I.start() < (Offset + Length))
20224       break;
20225     // If there's a previous interval, we should start after it.
20226     if (I != Intervals.begin() && (--I).stop() <= Offset)
20227       break;
20228     Intervals.insert(Offset, Offset + Length, Unit);
20229 
20230     ChainedStores.push_back(Chain);
20231     STChain = Chain;
20232   }
20233 
20234   // If we didn't find a chained store, exit.
20235   if (ChainedStores.size() == 0)
20236     return false;
20237 
20238   // Improve all chained stores (St and ChainedStores members) starting from
20239   // where the store chain ended and return single TokenFactor.
20240   SDValue NewChain = STChain->getChain();
20241   SmallVector<SDValue, 8> TFOps;
20242   for (unsigned I = ChainedStores.size(); I;) {
20243     StoreSDNode *S = ChainedStores[--I];
20244     SDValue BetterChain = FindBetterChain(S, NewChain);
20245     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20246         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20247     TFOps.push_back(SDValue(S, 0));
20248     ChainedStores[I] = S;
20249   }
20250 
20251   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20252   SDValue BetterChain = FindBetterChain(St, NewChain);
20253   SDValue NewST;
20254   if (St->isTruncatingStore())
20255     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20256                               St->getBasePtr(), St->getMemoryVT(),
20257                               St->getMemOperand());
20258   else
20259     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20260                          St->getBasePtr(), St->getMemOperand());
20261 
20262   TFOps.push_back(NewST);
20263 
20264   // If we improved every element of TFOps, then we've lost the dependence on
20265   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20266   // the beginning to keep relative order consistent with FindBetterChains.
20267   auto hasImprovedChain = [&](SDValue ST) -> bool {
20268     return ST->getOperand(0) != NewChain;
20269   };
20270   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20271   if (AddNewChain)
20272     TFOps.insert(TFOps.begin(), NewChain);
20273 
20274   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20275   CombineTo(St, TF);
20276 
20277   AddToWorklist(STChain);
20278   // Add TF operands worklist in reverse order.
20279   for (auto I = TF->getNumOperands(); I;)
20280     AddToWorklist(TF->getOperand(--I).getNode());
20281   AddToWorklist(TF.getNode());
20282   return true;
20283 }
20284 
20285 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20286   if (OptLevel == CodeGenOpt::None)
20287     return false;
20288 
20289   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20290 
20291   // We must have a base and an offset.
20292   if (!BasePtr.getBase().getNode())
20293     return false;
20294 
20295   // Do not handle stores to undef base pointers.
20296   if (BasePtr.getBase().isUndef())
20297     return false;
20298 
20299   // Directly improve a chain of disjoint stores starting at St.
20300   if (parallelizeChainedStores(St))
20301     return true;
20302 
20303   // Improve St's Chain..
20304   SDValue BetterChain = FindBetterChain(St, St->getChain());
20305   if (St->getChain() != BetterChain) {
20306     replaceStoreChain(St, BetterChain);
20307     return true;
20308   }
20309   return false;
20310 }
20311 
20312 /// This is the entry point for the file.
20313 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20314                            CodeGenOpt::Level OptLevel) {
20315   /// This is the main entry point to this class.
20316   DAGCombiner(*this, AA, OptLevel).Run(Level);
20317 }
20318