1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/RuntimeLibcalls.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 static cl::opt<bool>
115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116                        cl::desc("DAG combiner enable merging multiple stores "
117                                 "into a wider store"));
118 
119 static cl::opt<unsigned> TokenFactorInlineLimit(
120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121     cl::desc("Limit the number of operands to inline for Token Factors"));
122 
123 static cl::opt<unsigned> StoreMergeDependenceLimit(
124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
126              "to bail out in store merging dependence check"));
127 
128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130     cl::desc("DAG combiner enable reducing the width of load/op/store "
131              "sequence"));
132 
133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135     cl::desc("DAG combiner enable load/<replace bytes>/store with "
136              "a narrower store"));
137 
138 namespace {
139 
140   class DAGCombiner {
141     SelectionDAG &DAG;
142     const TargetLowering &TLI;
143     const SelectionDAGTargetInfo *STI;
144     CombineLevel Level = BeforeLegalizeTypes;
145     CodeGenOpt::Level OptLevel;
146     bool LegalDAG = false;
147     bool LegalOperations = false;
148     bool LegalTypes = false;
149     bool ForCodeSize;
150     bool DisableGenericCombines;
151 
152     /// Worklist of all of the nodes that need to be simplified.
153     ///
154     /// This must behave as a stack -- new nodes to process are pushed onto the
155     /// back and when processing we pop off of the back.
156     ///
157     /// The worklist will not contain duplicates but may contain null entries
158     /// due to nodes being deleted from the underlying DAG.
159     SmallVector<SDNode *, 64> Worklist;
160 
161     /// Mapping from an SDNode to its position on the worklist.
162     ///
163     /// This is used to find and remove nodes from the worklist (by nulling
164     /// them) when they are deleted from the underlying DAG. It relies on
165     /// stable indices of nodes within the worklist.
166     DenseMap<SDNode *, unsigned> WorklistMap;
167     /// This records all nodes attempted to add to the worklist since we
168     /// considered a new worklist entry. As we keep do not add duplicate nodes
169     /// in the worklist, this is different from the tail of the worklist.
170     SmallSetVector<SDNode *, 32> PruningList;
171 
172     /// Set of nodes which have been combined (at least once).
173     ///
174     /// This is used to allow us to reliably add any operands of a DAG node
175     /// which have not yet been combined to the worklist.
176     SmallPtrSet<SDNode *, 32> CombinedNodes;
177 
178     /// Map from candidate StoreNode to the pair of RootNode and count.
179     /// The count is used to track how many times we have seen the StoreNode
180     /// with the same RootNode bail out in dependence check. If we have seen
181     /// the bail out for the same pair many times over a limit, we won't
182     /// consider the StoreNode with the same RootNode as store merging
183     /// candidate again.
184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
185 
186     // AA - Used for DAG load/store alias analysis.
187     AliasAnalysis *AA;
188 
189     /// When an instruction is simplified, add all users of the instruction to
190     /// the work lists because they might get more simplified now.
191     void AddUsersToWorklist(SDNode *N) {
192       for (SDNode *Node : N->uses())
193         AddToWorklist(Node);
194     }
195 
196     /// Convenient shorthand to add a node and all of its user to the worklist.
197     void AddToWorklistWithUsers(SDNode *N) {
198       AddUsersToWorklist(N);
199       AddToWorklist(N);
200     }
201 
202     // Prune potentially dangling nodes. This is called after
203     // any visit to a node, but should also be called during a visit after any
204     // failed combine which may have created a DAG node.
205     void clearAddedDanglingWorklistEntries() {
206       // Check any nodes added to the worklist to see if they are prunable.
207       while (!PruningList.empty()) {
208         auto *N = PruningList.pop_back_val();
209         if (N->use_empty())
210           recursivelyDeleteUnusedNodes(N);
211       }
212     }
213 
214     SDNode *getNextWorklistEntry() {
215       // Before we do any work, remove nodes that are not in use.
216       clearAddedDanglingWorklistEntries();
217       SDNode *N = nullptr;
218       // The Worklist holds the SDNodes in order, but it may contain null
219       // entries.
220       while (!N && !Worklist.empty()) {
221         N = Worklist.pop_back_val();
222       }
223 
224       if (N) {
225         bool GoodWorklistEntry = WorklistMap.erase(N);
226         (void)GoodWorklistEntry;
227         assert(GoodWorklistEntry &&
228                "Found a worklist entry without a corresponding map entry!");
229       }
230       return N;
231     }
232 
233     /// Call the node-specific routine that folds each particular type of node.
234     SDValue visit(SDNode *N);
235 
236   public:
237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238         : DAG(D), TLI(D.getTargetLoweringInfo()),
239           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
240       ForCodeSize = DAG.shouldOptForSize();
241       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
242 
243       MaximumLegalStoreInBits = 0;
244       // We use the minimum store size here, since that's all we can guarantee
245       // for the scalable vector types.
246       for (MVT VT : MVT::all_valuetypes())
247         if (EVT(VT).isSimple() && VT != MVT::Other &&
248             TLI.isTypeLegal(EVT(VT)) &&
249             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
250           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
251     }
252 
253     void ConsiderForPruning(SDNode *N) {
254       // Mark this for potential pruning.
255       PruningList.insert(N);
256     }
257 
258     /// Add to the worklist making sure its instance is at the back (next to be
259     /// processed.)
260     void AddToWorklist(SDNode *N) {
261       assert(N->getOpcode() != ISD::DELETED_NODE &&
262              "Deleted Node added to Worklist");
263 
264       // Skip handle nodes as they can't usefully be combined and confuse the
265       // zero-use deletion strategy.
266       if (N->getOpcode() == ISD::HANDLENODE)
267         return;
268 
269       ConsiderForPruning(N);
270 
271       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
272         Worklist.push_back(N);
273     }
274 
275     /// Remove all instances of N from the worklist.
276     void removeFromWorklist(SDNode *N) {
277       CombinedNodes.erase(N);
278       PruningList.remove(N);
279       StoreRootCountMap.erase(N);
280 
281       auto It = WorklistMap.find(N);
282       if (It == WorklistMap.end())
283         return; // Not in the worklist.
284 
285       // Null out the entry rather than erasing it to avoid a linear operation.
286       Worklist[It->second] = nullptr;
287       WorklistMap.erase(It);
288     }
289 
290     void deleteAndRecombine(SDNode *N);
291     bool recursivelyDeleteUnusedNodes(SDNode *N);
292 
293     /// Replaces all uses of the results of one DAG node with new values.
294     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
295                       bool AddTo = true);
296 
297     /// Replaces all uses of the results of one DAG node with new values.
298     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
299       return CombineTo(N, &Res, 1, AddTo);
300     }
301 
302     /// Replaces all uses of the results of one DAG node with new values.
303     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
304                       bool AddTo = true) {
305       SDValue To[] = { Res0, Res1 };
306       return CombineTo(N, To, 2, AddTo);
307     }
308 
309     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
310 
311   private:
312     unsigned MaximumLegalStoreInBits;
313 
314     /// Check the specified integer node value to see if it can be simplified or
315     /// if things it uses can be simplified by bit propagation.
316     /// If so, return true.
317     bool SimplifyDemandedBits(SDValue Op) {
318       unsigned BitWidth = Op.getScalarValueSizeInBits();
319       APInt DemandedBits = APInt::getAllOnes(BitWidth);
320       return SimplifyDemandedBits(Op, DemandedBits);
321     }
322 
323     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
324       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
325       KnownBits Known;
326       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
327         return false;
328 
329       // Revisit the node.
330       AddToWorklist(Op.getNode());
331 
332       CommitTargetLoweringOpt(TLO);
333       return true;
334     }
335 
336     /// Check the specified vector node value to see if it can be simplified or
337     /// if things it uses can be simplified as it only uses some of the
338     /// elements. If so, return true.
339     bool SimplifyDemandedVectorElts(SDValue Op) {
340       // TODO: For now just pretend it cannot be simplified.
341       if (Op.getValueType().isScalableVector())
342         return false;
343 
344       unsigned NumElts = Op.getValueType().getVectorNumElements();
345       APInt DemandedElts = APInt::getAllOnes(NumElts);
346       return SimplifyDemandedVectorElts(Op, DemandedElts);
347     }
348 
349     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
350                               const APInt &DemandedElts,
351                               bool AssumeSingleUse = false);
352     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
353                                     bool AssumeSingleUse = false);
354 
355     bool CombineToPreIndexedLoadStore(SDNode *N);
356     bool CombineToPostIndexedLoadStore(SDNode *N);
357     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
358     bool SliceUpLoad(SDNode *N);
359 
360     // Scalars have size 0 to distinguish from singleton vectors.
361     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
362     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
363     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
364 
365     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
366     ///   load.
367     ///
368     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
369     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
370     /// \param EltNo index of the vector element to load.
371     /// \param OriginalLoad load that EVE came from to be replaced.
372     /// \returns EVE on success SDValue() on failure.
373     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
374                                          SDValue EltNo,
375                                          LoadSDNode *OriginalLoad);
376     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
377     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
378     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
379     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
380     SDValue PromoteIntBinOp(SDValue Op);
381     SDValue PromoteIntShiftOp(SDValue Op);
382     SDValue PromoteExtend(SDValue Op);
383     bool PromoteLoad(SDValue Op);
384 
385     /// Call the node-specific routine that knows how to fold each
386     /// particular type of node. If that doesn't do anything, try the
387     /// target-specific DAG combines.
388     SDValue combine(SDNode *N);
389 
390     // Visitation implementation - Implement dag node combining for different
391     // node types.  The semantics are as follows:
392     // Return Value:
393     //   SDValue.getNode() == 0 - No change was made
394     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
395     //   otherwise              - N should be replaced by the returned Operand.
396     //
397     SDValue visitTokenFactor(SDNode *N);
398     SDValue visitMERGE_VALUES(SDNode *N);
399     SDValue visitADD(SDNode *N);
400     SDValue visitADDLike(SDNode *N);
401     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
402     SDValue visitSUB(SDNode *N);
403     SDValue visitADDSAT(SDNode *N);
404     SDValue visitSUBSAT(SDNode *N);
405     SDValue visitADDC(SDNode *N);
406     SDValue visitADDO(SDNode *N);
407     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
408     SDValue visitSUBC(SDNode *N);
409     SDValue visitSUBO(SDNode *N);
410     SDValue visitADDE(SDNode *N);
411     SDValue visitADDCARRY(SDNode *N);
412     SDValue visitSADDO_CARRY(SDNode *N);
413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414     SDValue visitSUBE(SDNode *N);
415     SDValue visitSUBCARRY(SDNode *N);
416     SDValue visitSSUBO_CARRY(SDNode *N);
417     SDValue visitMUL(SDNode *N);
418     SDValue visitMULFIX(SDNode *N);
419     SDValue useDivRem(SDNode *N);
420     SDValue visitSDIV(SDNode *N);
421     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
422     SDValue visitUDIV(SDNode *N);
423     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
424     SDValue visitREM(SDNode *N);
425     SDValue visitMULHU(SDNode *N);
426     SDValue visitMULHS(SDNode *N);
427     SDValue visitAVG(SDNode *N);
428     SDValue visitSMUL_LOHI(SDNode *N);
429     SDValue visitUMUL_LOHI(SDNode *N);
430     SDValue visitMULO(SDNode *N);
431     SDValue visitIMINMAX(SDNode *N);
432     SDValue visitAND(SDNode *N);
433     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
434     SDValue visitOR(SDNode *N);
435     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
436     SDValue visitXOR(SDNode *N);
437     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
438     SDValue visitSHL(SDNode *N);
439     SDValue visitSRA(SDNode *N);
440     SDValue visitSRL(SDNode *N);
441     SDValue visitFunnelShift(SDNode *N);
442     SDValue visitSHLSAT(SDNode *N);
443     SDValue visitRotate(SDNode *N);
444     SDValue visitABS(SDNode *N);
445     SDValue visitBSWAP(SDNode *N);
446     SDValue visitBITREVERSE(SDNode *N);
447     SDValue visitCTLZ(SDNode *N);
448     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
449     SDValue visitCTTZ(SDNode *N);
450     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
451     SDValue visitCTPOP(SDNode *N);
452     SDValue visitSELECT(SDNode *N);
453     SDValue visitVSELECT(SDNode *N);
454     SDValue visitSELECT_CC(SDNode *N);
455     SDValue visitSETCC(SDNode *N);
456     SDValue visitSETCCCARRY(SDNode *N);
457     SDValue visitSIGN_EXTEND(SDNode *N);
458     SDValue visitZERO_EXTEND(SDNode *N);
459     SDValue visitANY_EXTEND(SDNode *N);
460     SDValue visitAssertExt(SDNode *N);
461     SDValue visitAssertAlign(SDNode *N);
462     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
463     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
464     SDValue visitTRUNCATE(SDNode *N);
465     SDValue visitBITCAST(SDNode *N);
466     SDValue visitFREEZE(SDNode *N);
467     SDValue visitBUILD_PAIR(SDNode *N);
468     SDValue visitFADD(SDNode *N);
469     SDValue visitSTRICT_FADD(SDNode *N);
470     SDValue visitFSUB(SDNode *N);
471     SDValue visitFMUL(SDNode *N);
472     SDValue visitFMA(SDNode *N);
473     SDValue visitFDIV(SDNode *N);
474     SDValue visitFREM(SDNode *N);
475     SDValue visitFSQRT(SDNode *N);
476     SDValue visitFCOPYSIGN(SDNode *N);
477     SDValue visitFPOW(SDNode *N);
478     SDValue visitSINT_TO_FP(SDNode *N);
479     SDValue visitUINT_TO_FP(SDNode *N);
480     SDValue visitFP_TO_SINT(SDNode *N);
481     SDValue visitFP_TO_UINT(SDNode *N);
482     SDValue visitFP_ROUND(SDNode *N);
483     SDValue visitFP_EXTEND(SDNode *N);
484     SDValue visitFNEG(SDNode *N);
485     SDValue visitFABS(SDNode *N);
486     SDValue visitFCEIL(SDNode *N);
487     SDValue visitFTRUNC(SDNode *N);
488     SDValue visitFFLOOR(SDNode *N);
489     SDValue visitFMinMax(SDNode *N);
490     SDValue visitBRCOND(SDNode *N);
491     SDValue visitBR_CC(SDNode *N);
492     SDValue visitLOAD(SDNode *N);
493 
494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496 
497     SDValue visitSTORE(SDNode *N);
498     SDValue visitLIFETIME_END(SDNode *N);
499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501     SDValue visitBUILD_VECTOR(SDNode *N);
502     SDValue visitCONCAT_VECTORS(SDNode *N);
503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
507     SDValue visitMLOAD(SDNode *N);
508     SDValue visitMSTORE(SDNode *N);
509     SDValue visitMGATHER(SDNode *N);
510     SDValue visitMSCATTER(SDNode *N);
511     SDValue visitFP_TO_FP16(SDNode *N);
512     SDValue visitFP16_TO_FP(SDNode *N);
513     SDValue visitVECREDUCE(SDNode *N);
514     SDValue visitVPOp(SDNode *N);
515 
516     SDValue visitFADDForFMACombine(SDNode *N);
517     SDValue visitFSUBForFMACombine(SDNode *N);
518     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
519 
520     SDValue XformToShuffleWithZero(SDNode *N);
521     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
522                                                     const SDLoc &DL, SDValue N0,
523                                                     SDValue N1);
524     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
525                                       SDValue N1);
526     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
527                            SDValue N1, SDNodeFlags Flags);
528 
529     SDValue visitShiftByConstant(SDNode *N);
530 
531     SDValue foldSelectOfConstants(SDNode *N);
532     SDValue foldVSelectOfConstants(SDNode *N);
533     SDValue foldBinOpIntoSelect(SDNode *BO);
534     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
535     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
536     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
537     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
538                              SDValue N2, SDValue N3, ISD::CondCode CC,
539                              bool NotExtCompare = false);
540     SDValue convertSelectOfFPConstantsToLoadOffset(
541         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
542         ISD::CondCode CC);
543     SDValue foldSignChangeInBitcast(SDNode *N);
544     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
545                                    SDValue N2, SDValue N3, ISD::CondCode CC);
546     SDValue foldSelectOfBinops(SDNode *N);
547     SDValue foldSextSetcc(SDNode *N);
548     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
549                               const SDLoc &DL);
550     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
551     SDValue unfoldMaskedMerge(SDNode *N);
552     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
553     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
554                           const SDLoc &DL, bool foldBooleans);
555     SDValue rebuildSetCC(SDValue N);
556 
557     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
558                            SDValue &CC, bool MatchStrict = false) const;
559     bool isOneUseSetCC(SDValue N) const;
560 
561     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
562                                          unsigned HiOp);
563     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
564     SDValue CombineExtLoad(SDNode *N);
565     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
566     SDValue combineRepeatedFPDivisors(SDNode *N);
567     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
568     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
569     SDValue BuildSDIV(SDNode *N);
570     SDValue BuildSDIVPow2(SDNode *N);
571     SDValue BuildUDIV(SDNode *N);
572     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
573     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
574     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
575     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
576     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
577     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
578                                 SDNodeFlags Flags, bool Reciprocal);
579     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
580                                 SDNodeFlags Flags, bool Reciprocal);
581     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
582                                bool DemandHighBits = true);
583     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
584     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
585                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
586                               unsigned PosOpcode, unsigned NegOpcode,
587                               const SDLoc &DL);
588     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
589                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
590                               unsigned PosOpcode, unsigned NegOpcode,
591                               const SDLoc &DL);
592     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
593     SDValue MatchLoadCombine(SDNode *N);
594     SDValue mergeTruncStores(StoreSDNode *N);
595     SDValue reduceLoadWidth(SDNode *N);
596     SDValue ReduceLoadOpStoreWidth(SDNode *N);
597     SDValue splitMergedValStore(StoreSDNode *ST);
598     SDValue TransformFPLoadStorePair(SDNode *N);
599     SDValue convertBuildVecZextToZext(SDNode *N);
600     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
601     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
602     SDValue reduceBuildVecToShuffle(SDNode *N);
603     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
604                                   ArrayRef<int> VectorMask, SDValue VecIn1,
605                                   SDValue VecIn2, unsigned LeftIdx,
606                                   bool DidSplitVec);
607     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
608 
609     /// Walk up chain skipping non-aliasing memory nodes,
610     /// looking for aliasing nodes and adding them to the Aliases vector.
611     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
612                           SmallVectorImpl<SDValue> &Aliases);
613 
614     /// Return true if there is any possibility that the two addresses overlap.
615     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
616 
617     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
618     /// chain (aliasing node.)
619     SDValue FindBetterChain(SDNode *N, SDValue Chain);
620 
621     /// Try to replace a store and any possibly adjacent stores on
622     /// consecutive chains with better chains. Return true only if St is
623     /// replaced.
624     ///
625     /// Notice that other chains may still be replaced even if the function
626     /// returns false.
627     bool findBetterNeighborChains(StoreSDNode *St);
628 
629     // Helper for findBetterNeighborChains. Walk up store chain add additional
630     // chained stores that do not overlap and can be parallelized.
631     bool parallelizeChainedStores(StoreSDNode *St);
632 
633     /// Holds a pointer to an LSBaseSDNode as well as information on where it
634     /// is located in a sequence of memory operations connected by a chain.
635     struct MemOpLink {
636       // Ptr to the mem node.
637       LSBaseSDNode *MemNode;
638 
639       // Offset from the base ptr.
640       int64_t OffsetFromBase;
641 
642       MemOpLink(LSBaseSDNode *N, int64_t Offset)
643           : MemNode(N), OffsetFromBase(Offset) {}
644     };
645 
646     // Classify the origin of a stored value.
647     enum class StoreSource { Unknown, Constant, Extract, Load };
648     StoreSource getStoreSource(SDValue StoreVal) {
649       switch (StoreVal.getOpcode()) {
650       case ISD::Constant:
651       case ISD::ConstantFP:
652         return StoreSource::Constant;
653       case ISD::EXTRACT_VECTOR_ELT:
654       case ISD::EXTRACT_SUBVECTOR:
655         return StoreSource::Extract;
656       case ISD::LOAD:
657         return StoreSource::Load;
658       default:
659         return StoreSource::Unknown;
660       }
661     }
662 
663     /// This is a helper function for visitMUL to check the profitability
664     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
665     /// MulNode is the original multiply, AddNode is (add x, c1),
666     /// and ConstNode is c2.
667     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
668                                      SDValue ConstNode);
669 
670     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
671     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
672     /// the type of the loaded value to be extended.
673     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
674                           EVT LoadResultTy, EVT &ExtVT);
675 
676     /// Helper function to calculate whether the given Load/Store can have its
677     /// width reduced to ExtVT.
678     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
679                            EVT &MemVT, unsigned ShAmt = 0);
680 
681     /// Used by BackwardsPropagateMask to find suitable loads.
682     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
683                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
684                            ConstantSDNode *Mask, SDNode *&NodeToMask);
685     /// Attempt to propagate a given AND node back to load leaves so that they
686     /// can be combined into narrow loads.
687     bool BackwardsPropagateMask(SDNode *N);
688 
689     /// Helper function for mergeConsecutiveStores which merges the component
690     /// store chains.
691     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
692                                 unsigned NumStores);
693 
694     /// This is a helper function for mergeConsecutiveStores. When the source
695     /// elements of the consecutive stores are all constants or all extracted
696     /// vector elements, try to merge them into one larger store introducing
697     /// bitcasts if necessary.  \return True if a merged store was created.
698     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
699                                          EVT MemVT, unsigned NumStores,
700                                          bool IsConstantSrc, bool UseVector,
701                                          bool UseTrunc);
702 
703     /// This is a helper function for mergeConsecutiveStores. Stores that
704     /// potentially may be merged with St are placed in StoreNodes. RootNode is
705     /// a chain predecessor to all store candidates.
706     void getStoreMergeCandidates(StoreSDNode *St,
707                                  SmallVectorImpl<MemOpLink> &StoreNodes,
708                                  SDNode *&Root);
709 
710     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
711     /// have indirect dependency through their operands. RootNode is the
712     /// predecessor to all stores calculated by getStoreMergeCandidates and is
713     /// used to prune the dependency check. \return True if safe to merge.
714     bool checkMergeStoreCandidatesForDependencies(
715         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
716         SDNode *RootNode);
717 
718     /// This is a helper function for mergeConsecutiveStores. Given a list of
719     /// store candidates, find the first N that are consecutive in memory.
720     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
721     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
722                                   int64_t ElementSizeBytes) const;
723 
724     /// This is a helper function for mergeConsecutiveStores. It is used for
725     /// store chains that are composed entirely of constant values.
726     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
727                                   unsigned NumConsecutiveStores,
728                                   EVT MemVT, SDNode *Root, bool AllowVectors);
729 
730     /// This is a helper function for mergeConsecutiveStores. It is used for
731     /// store chains that are composed entirely of extracted vector elements.
732     /// When extracting multiple vector elements, try to store them in one
733     /// vector store rather than a sequence of scalar stores.
734     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
735                                  unsigned NumConsecutiveStores, EVT MemVT,
736                                  SDNode *Root);
737 
738     /// This is a helper function for mergeConsecutiveStores. It is used for
739     /// store chains that are composed entirely of loaded values.
740     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
741                               unsigned NumConsecutiveStores, EVT MemVT,
742                               SDNode *Root, bool AllowVectors,
743                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
744 
745     /// Merge consecutive store operations into a wide store.
746     /// This optimization uses wide integers or vectors when possible.
747     /// \return true if stores were merged.
748     bool mergeConsecutiveStores(StoreSDNode *St);
749 
750     /// Try to transform a truncation where C is a constant:
751     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
752     ///
753     /// \p N needs to be a truncation and its first operand an AND. Other
754     /// requirements are checked by the function (e.g. that trunc is
755     /// single-use) and if missed an empty SDValue is returned.
756     SDValue distributeTruncateThroughAnd(SDNode *N);
757 
758     /// Helper function to determine whether the target supports operation
759     /// given by \p Opcode for type \p VT, that is, whether the operation
760     /// is legal or custom before legalizing operations, and whether is
761     /// legal (but not custom) after legalization.
762     bool hasOperation(unsigned Opcode, EVT VT) {
763       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
764     }
765 
766   public:
767     /// Runs the dag combiner on all nodes in the work list
768     void Run(CombineLevel AtLevel);
769 
770     SelectionDAG &getDAG() const { return DAG; }
771 
772     /// Returns a type large enough to hold any valid shift amount - before type
773     /// legalization these can be huge.
774     EVT getShiftAmountTy(EVT LHSTy) {
775       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
776       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
777     }
778 
779     /// This method returns true if we are running before type legalization or
780     /// if the specified VT is legal.
781     bool isTypeLegal(const EVT &VT) {
782       if (!LegalTypes) return true;
783       return TLI.isTypeLegal(VT);
784     }
785 
786     /// Convenience wrapper around TargetLowering::getSetCCResultType
787     EVT getSetCCResultType(EVT VT) const {
788       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
789     }
790 
791     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
792                          SDValue OrigLoad, SDValue ExtLoad,
793                          ISD::NodeType ExtType);
794   };
795 
796 /// This class is a DAGUpdateListener that removes any deleted
797 /// nodes from the worklist.
798 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
799   DAGCombiner &DC;
800 
801 public:
802   explicit WorklistRemover(DAGCombiner &dc)
803     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
804 
805   void NodeDeleted(SDNode *N, SDNode *E) override {
806     DC.removeFromWorklist(N);
807   }
808 };
809 
810 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
811   DAGCombiner &DC;
812 
813 public:
814   explicit WorklistInserter(DAGCombiner &dc)
815       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
816 
817   // FIXME: Ideally we could add N to the worklist, but this causes exponential
818   //        compile time costs in large DAGs, e.g. Halide.
819   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
820 };
821 
822 } // end anonymous namespace
823 
824 //===----------------------------------------------------------------------===//
825 //  TargetLowering::DAGCombinerInfo implementation
826 //===----------------------------------------------------------------------===//
827 
828 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
829   ((DAGCombiner*)DC)->AddToWorklist(N);
830 }
831 
832 SDValue TargetLowering::DAGCombinerInfo::
833 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
834   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
835 }
836 
837 SDValue TargetLowering::DAGCombinerInfo::
838 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
839   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
840 }
841 
842 SDValue TargetLowering::DAGCombinerInfo::
843 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
844   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
845 }
846 
847 bool TargetLowering::DAGCombinerInfo::
848 recursivelyDeleteUnusedNodes(SDNode *N) {
849   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
850 }
851 
852 void TargetLowering::DAGCombinerInfo::
853 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
854   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
855 }
856 
857 //===----------------------------------------------------------------------===//
858 // Helper Functions
859 //===----------------------------------------------------------------------===//
860 
861 void DAGCombiner::deleteAndRecombine(SDNode *N) {
862   removeFromWorklist(N);
863 
864   // If the operands of this node are only used by the node, they will now be
865   // dead. Make sure to re-visit them and recursively delete dead nodes.
866   for (const SDValue &Op : N->ops())
867     // For an operand generating multiple values, one of the values may
868     // become dead allowing further simplification (e.g. split index
869     // arithmetic from an indexed load).
870     if (Op->hasOneUse() || Op->getNumValues() > 1)
871       AddToWorklist(Op.getNode());
872 
873   DAG.DeleteNode(N);
874 }
875 
876 // APInts must be the same size for most operations, this helper
877 // function zero extends the shorter of the pair so that they match.
878 // We provide an Offset so that we can create bitwidths that won't overflow.
879 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
880   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
881   LHS = LHS.zextOrSelf(Bits);
882   RHS = RHS.zextOrSelf(Bits);
883 }
884 
885 // Return true if this node is a setcc, or is a select_cc
886 // that selects between the target values used for true and false, making it
887 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
888 // the appropriate nodes based on the type of node we are checking. This
889 // simplifies life a bit for the callers.
890 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
891                                     SDValue &CC, bool MatchStrict) const {
892   if (N.getOpcode() == ISD::SETCC) {
893     LHS = N.getOperand(0);
894     RHS = N.getOperand(1);
895     CC  = N.getOperand(2);
896     return true;
897   }
898 
899   if (MatchStrict &&
900       (N.getOpcode() == ISD::STRICT_FSETCC ||
901        N.getOpcode() == ISD::STRICT_FSETCCS)) {
902     LHS = N.getOperand(1);
903     RHS = N.getOperand(2);
904     CC  = N.getOperand(3);
905     return true;
906   }
907 
908   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
909       !TLI.isConstFalseVal(N.getOperand(3)))
910     return false;
911 
912   if (TLI.getBooleanContents(N.getValueType()) ==
913       TargetLowering::UndefinedBooleanContent)
914     return false;
915 
916   LHS = N.getOperand(0);
917   RHS = N.getOperand(1);
918   CC  = N.getOperand(4);
919   return true;
920 }
921 
922 /// Return true if this is a SetCC-equivalent operation with only one use.
923 /// If this is true, it allows the users to invert the operation for free when
924 /// it is profitable to do so.
925 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
926   SDValue N0, N1, N2;
927   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
928     return true;
929   return false;
930 }
931 
932 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
933   if (!ScalarTy.isSimple())
934     return false;
935 
936   uint64_t MaskForTy = 0ULL;
937   switch (ScalarTy.getSimpleVT().SimpleTy) {
938   case MVT::i8:
939     MaskForTy = 0xFFULL;
940     break;
941   case MVT::i16:
942     MaskForTy = 0xFFFFULL;
943     break;
944   case MVT::i32:
945     MaskForTy = 0xFFFFFFFFULL;
946     break;
947   default:
948     return false;
949     break;
950   }
951 
952   APInt Val;
953   if (ISD::isConstantSplatVector(N, Val))
954     return Val.getLimitedValue() == MaskForTy;
955 
956   return false;
957 }
958 
959 // Determines if it is a constant integer or a splat/build vector of constant
960 // integers (and undefs).
961 // Do not permit build vector implicit truncation.
962 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
963   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
964     return !(Const->isOpaque() && NoOpaques);
965   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
966     return false;
967   unsigned BitWidth = N.getScalarValueSizeInBits();
968   for (const SDValue &Op : N->op_values()) {
969     if (Op.isUndef())
970       continue;
971     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
972     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
973         (Const->isOpaque() && NoOpaques))
974       return false;
975   }
976   return true;
977 }
978 
979 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
980 // undef's.
981 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
982   if (V.getOpcode() != ISD::BUILD_VECTOR)
983     return false;
984   return isConstantOrConstantVector(V, NoOpaques) ||
985          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
986 }
987 
988 // Determine if this an indexed load with an opaque target constant index.
989 static bool canSplitIdx(LoadSDNode *LD) {
990   return MaySplitLoadIndex &&
991          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
992           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
993 }
994 
995 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
996                                                              const SDLoc &DL,
997                                                              SDValue N0,
998                                                              SDValue N1) {
999   // Currently this only tries to ensure we don't undo the GEP splits done by
1000   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1001   // we check if the following transformation would be problematic:
1002   // (load/store (add, (add, x, offset1), offset2)) ->
1003   // (load/store (add, x, offset1+offset2)).
1004 
1005   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1006     return false;
1007 
1008   if (N0.hasOneUse())
1009     return false;
1010 
1011   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1012   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1013   if (!C1 || !C2)
1014     return false;
1015 
1016   const APInt &C1APIntVal = C1->getAPIntValue();
1017   const APInt &C2APIntVal = C2->getAPIntValue();
1018   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1019     return false;
1020 
1021   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1022   if (CombinedValueIntVal.getBitWidth() > 64)
1023     return false;
1024   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1025 
1026   for (SDNode *Node : N0->uses()) {
1027     auto LoadStore = dyn_cast<MemSDNode>(Node);
1028     if (LoadStore) {
1029       // Is x[offset2] already not a legal addressing mode? If so then
1030       // reassociating the constants breaks nothing (we test offset2 because
1031       // that's the one we hope to fold into the load or store).
1032       TargetLoweringBase::AddrMode AM;
1033       AM.HasBaseReg = true;
1034       AM.BaseOffs = C2APIntVal.getSExtValue();
1035       EVT VT = LoadStore->getMemoryVT();
1036       unsigned AS = LoadStore->getAddressSpace();
1037       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1038       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1039         continue;
1040 
1041       // Would x[offset1+offset2] still be a legal addressing mode?
1042       AM.BaseOffs = CombinedValue;
1043       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1044         return true;
1045     }
1046   }
1047 
1048   return false;
1049 }
1050 
1051 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1052 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1053 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1054                                                SDValue N0, SDValue N1) {
1055   EVT VT = N0.getValueType();
1056 
1057   if (N0.getOpcode() != Opc)
1058     return SDValue();
1059 
1060   SDValue N00 = N0.getOperand(0);
1061   SDValue N01 = N0.getOperand(1);
1062 
1063   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1064     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1065       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1066       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1067         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1068       return SDValue();
1069     }
1070     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1071       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1072       //              iff (op x, c1) has one use
1073       if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
1074         return DAG.getNode(Opc, DL, VT, OpNode, N01);
1075       return SDValue();
1076     }
1077   }
1078 
1079   // Check for repeated operand logic simplifications.
1080   if (Opc == ISD::AND || Opc == ISD::OR) {
1081     // (N00 & N01) & N00 --> N00 & N01
1082     // (N00 & N01) & N01 --> N00 & N01
1083     // (N00 | N01) | N00 --> N00 | N01
1084     // (N00 | N01) | N01 --> N00 | N01
1085     if (N1 == N00 || N1 == N01)
1086       return N0;
1087   }
1088   if (Opc == ISD::XOR) {
1089     // (N00 ^ N01) ^ N00 --> N01
1090     if (N1 == N00)
1091       return N01;
1092     // (N00 ^ N01) ^ N01 --> N00
1093     if (N1 == N01)
1094       return N00;
1095   }
1096 
1097   return SDValue();
1098 }
1099 
1100 // Try to reassociate commutative binops.
1101 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1102                                     SDValue N1, SDNodeFlags Flags) {
1103   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1104 
1105   // Floating-point reassociation is not allowed without loose FP math.
1106   if (N0.getValueType().isFloatingPoint() ||
1107       N1.getValueType().isFloatingPoint())
1108     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1109       return SDValue();
1110 
1111   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1112     return Combined;
1113   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1114     return Combined;
1115   return SDValue();
1116 }
1117 
1118 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1119                                bool AddTo) {
1120   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1121   ++NodesCombined;
1122   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1123              To[0].dump(&DAG);
1124              dbgs() << " and " << NumTo - 1 << " other values\n");
1125   for (unsigned i = 0, e = NumTo; i != e; ++i)
1126     assert((!To[i].getNode() ||
1127             N->getValueType(i) == To[i].getValueType()) &&
1128            "Cannot combine value to value of different type!");
1129 
1130   WorklistRemover DeadNodes(*this);
1131   DAG.ReplaceAllUsesWith(N, To);
1132   if (AddTo) {
1133     // Push the new nodes and any users onto the worklist
1134     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1135       if (To[i].getNode()) {
1136         AddToWorklist(To[i].getNode());
1137         AddUsersToWorklist(To[i].getNode());
1138       }
1139     }
1140   }
1141 
1142   // Finally, if the node is now dead, remove it from the graph.  The node
1143   // may not be dead if the replacement process recursively simplified to
1144   // something else needing this node.
1145   if (N->use_empty())
1146     deleteAndRecombine(N);
1147   return SDValue(N, 0);
1148 }
1149 
1150 void DAGCombiner::
1151 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1152   // Replace the old value with the new one.
1153   ++NodesCombined;
1154   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1155              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1156 
1157   // Replace all uses.  If any nodes become isomorphic to other nodes and
1158   // are deleted, make sure to remove them from our worklist.
1159   WorklistRemover DeadNodes(*this);
1160   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1161 
1162   // Push the new node and any (possibly new) users onto the worklist.
1163   AddToWorklistWithUsers(TLO.New.getNode());
1164 
1165   // Finally, if the node is now dead, remove it from the graph.  The node
1166   // may not be dead if the replacement process recursively simplified to
1167   // something else needing this node.
1168   if (TLO.Old->use_empty())
1169     deleteAndRecombine(TLO.Old.getNode());
1170 }
1171 
1172 /// Check the specified integer node value to see if it can be simplified or if
1173 /// things it uses can be simplified by bit propagation. If so, return true.
1174 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1175                                        const APInt &DemandedElts,
1176                                        bool AssumeSingleUse) {
1177   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1178   KnownBits Known;
1179   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1180                                 AssumeSingleUse))
1181     return false;
1182 
1183   // Revisit the node.
1184   AddToWorklist(Op.getNode());
1185 
1186   CommitTargetLoweringOpt(TLO);
1187   return true;
1188 }
1189 
1190 /// Check the specified vector node value to see if it can be simplified or
1191 /// if things it uses can be simplified as it only uses some of the elements.
1192 /// If so, return true.
1193 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1194                                              const APInt &DemandedElts,
1195                                              bool AssumeSingleUse) {
1196   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1197   APInt KnownUndef, KnownZero;
1198   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1199                                       TLO, 0, AssumeSingleUse))
1200     return false;
1201 
1202   // Revisit the node.
1203   AddToWorklist(Op.getNode());
1204 
1205   CommitTargetLoweringOpt(TLO);
1206   return true;
1207 }
1208 
1209 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1210   SDLoc DL(Load);
1211   EVT VT = Load->getValueType(0);
1212   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1213 
1214   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1215              Trunc.dump(&DAG); dbgs() << '\n');
1216   WorklistRemover DeadNodes(*this);
1217   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1218   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1219   deleteAndRecombine(Load);
1220   AddToWorklist(Trunc.getNode());
1221 }
1222 
1223 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1224   Replace = false;
1225   SDLoc DL(Op);
1226   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1227     LoadSDNode *LD = cast<LoadSDNode>(Op);
1228     EVT MemVT = LD->getMemoryVT();
1229     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1230                                                       : LD->getExtensionType();
1231     Replace = true;
1232     return DAG.getExtLoad(ExtType, DL, PVT,
1233                           LD->getChain(), LD->getBasePtr(),
1234                           MemVT, LD->getMemOperand());
1235   }
1236 
1237   unsigned Opc = Op.getOpcode();
1238   switch (Opc) {
1239   default: break;
1240   case ISD::AssertSext:
1241     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1242       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1243     break;
1244   case ISD::AssertZext:
1245     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1246       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1247     break;
1248   case ISD::Constant: {
1249     unsigned ExtOpc =
1250       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1251     return DAG.getNode(ExtOpc, DL, PVT, Op);
1252   }
1253   }
1254 
1255   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1256     return SDValue();
1257   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1258 }
1259 
1260 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1261   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1262     return SDValue();
1263   EVT OldVT = Op.getValueType();
1264   SDLoc DL(Op);
1265   bool Replace = false;
1266   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267   if (!NewOp.getNode())
1268     return SDValue();
1269   AddToWorklist(NewOp.getNode());
1270 
1271   if (Replace)
1272     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1274                      DAG.getValueType(OldVT));
1275 }
1276 
1277 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1278   EVT OldVT = Op.getValueType();
1279   SDLoc DL(Op);
1280   bool Replace = false;
1281   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1282   if (!NewOp.getNode())
1283     return SDValue();
1284   AddToWorklist(NewOp.getNode());
1285 
1286   if (Replace)
1287     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1288   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1289 }
1290 
1291 /// Promote the specified integer binary operation if the target indicates it is
1292 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1293 /// i32 since i16 instructions are longer.
1294 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1295   if (!LegalOperations)
1296     return SDValue();
1297 
1298   EVT VT = Op.getValueType();
1299   if (VT.isVector() || !VT.isInteger())
1300     return SDValue();
1301 
1302   // If operation type is 'undesirable', e.g. i16 on x86, consider
1303   // promoting it.
1304   unsigned Opc = Op.getOpcode();
1305   if (TLI.isTypeDesirableForOp(Opc, VT))
1306     return SDValue();
1307 
1308   EVT PVT = VT;
1309   // Consult target whether it is a good idea to promote this operation and
1310   // what's the right type to promote it to.
1311   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1312     assert(PVT != VT && "Don't know what type to promote to!");
1313 
1314     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1315 
1316     bool Replace0 = false;
1317     SDValue N0 = Op.getOperand(0);
1318     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1319 
1320     bool Replace1 = false;
1321     SDValue N1 = Op.getOperand(1);
1322     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1323     SDLoc DL(Op);
1324 
1325     SDValue RV =
1326         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1327 
1328     // We are always replacing N0/N1's use in N and only need additional
1329     // replacements if there are additional uses.
1330     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1331     //       (SDValue) here because the node may reference multiple values
1332     //       (for example, the chain value of a load node).
1333     Replace0 &= !N0->hasOneUse();
1334     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1335 
1336     // Combine Op here so it is preserved past replacements.
1337     CombineTo(Op.getNode(), RV);
1338 
1339     // If operands have a use ordering, make sure we deal with
1340     // predecessor first.
1341     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1342       std::swap(N0, N1);
1343       std::swap(NN0, NN1);
1344     }
1345 
1346     if (Replace0) {
1347       AddToWorklist(NN0.getNode());
1348       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1349     }
1350     if (Replace1) {
1351       AddToWorklist(NN1.getNode());
1352       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1353     }
1354     return Op;
1355   }
1356   return SDValue();
1357 }
1358 
1359 /// Promote the specified integer shift operation if the target indicates it is
1360 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1361 /// i32 since i16 instructions are longer.
1362 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1363   if (!LegalOperations)
1364     return SDValue();
1365 
1366   EVT VT = Op.getValueType();
1367   if (VT.isVector() || !VT.isInteger())
1368     return SDValue();
1369 
1370   // If operation type is 'undesirable', e.g. i16 on x86, consider
1371   // promoting it.
1372   unsigned Opc = Op.getOpcode();
1373   if (TLI.isTypeDesirableForOp(Opc, VT))
1374     return SDValue();
1375 
1376   EVT PVT = VT;
1377   // Consult target whether it is a good idea to promote this operation and
1378   // what's the right type to promote it to.
1379   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1380     assert(PVT != VT && "Don't know what type to promote to!");
1381 
1382     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1383 
1384     bool Replace = false;
1385     SDValue N0 = Op.getOperand(0);
1386     SDValue N1 = Op.getOperand(1);
1387     if (Opc == ISD::SRA)
1388       N0 = SExtPromoteOperand(N0, PVT);
1389     else if (Opc == ISD::SRL)
1390       N0 = ZExtPromoteOperand(N0, PVT);
1391     else
1392       N0 = PromoteOperand(N0, PVT, Replace);
1393 
1394     if (!N0.getNode())
1395       return SDValue();
1396 
1397     SDLoc DL(Op);
1398     SDValue RV =
1399         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1400 
1401     if (Replace)
1402       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1403 
1404     // Deal with Op being deleted.
1405     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1406       return RV;
1407   }
1408   return SDValue();
1409 }
1410 
1411 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1412   if (!LegalOperations)
1413     return SDValue();
1414 
1415   EVT VT = Op.getValueType();
1416   if (VT.isVector() || !VT.isInteger())
1417     return SDValue();
1418 
1419   // If operation type is 'undesirable', e.g. i16 on x86, consider
1420   // promoting it.
1421   unsigned Opc = Op.getOpcode();
1422   if (TLI.isTypeDesirableForOp(Opc, VT))
1423     return SDValue();
1424 
1425   EVT PVT = VT;
1426   // Consult target whether it is a good idea to promote this operation and
1427   // what's the right type to promote it to.
1428   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1429     assert(PVT != VT && "Don't know what type to promote to!");
1430     // fold (aext (aext x)) -> (aext x)
1431     // fold (aext (zext x)) -> (zext x)
1432     // fold (aext (sext x)) -> (sext x)
1433     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1434     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1435   }
1436   return SDValue();
1437 }
1438 
1439 bool DAGCombiner::PromoteLoad(SDValue Op) {
1440   if (!LegalOperations)
1441     return false;
1442 
1443   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1444     return false;
1445 
1446   EVT VT = Op.getValueType();
1447   if (VT.isVector() || !VT.isInteger())
1448     return false;
1449 
1450   // If operation type is 'undesirable', e.g. i16 on x86, consider
1451   // promoting it.
1452   unsigned Opc = Op.getOpcode();
1453   if (TLI.isTypeDesirableForOp(Opc, VT))
1454     return false;
1455 
1456   EVT PVT = VT;
1457   // Consult target whether it is a good idea to promote this operation and
1458   // what's the right type to promote it to.
1459   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1460     assert(PVT != VT && "Don't know what type to promote to!");
1461 
1462     SDLoc DL(Op);
1463     SDNode *N = Op.getNode();
1464     LoadSDNode *LD = cast<LoadSDNode>(N);
1465     EVT MemVT = LD->getMemoryVT();
1466     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1467                                                       : LD->getExtensionType();
1468     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1469                                    LD->getChain(), LD->getBasePtr(),
1470                                    MemVT, LD->getMemOperand());
1471     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1472 
1473     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1474                Result.dump(&DAG); dbgs() << '\n');
1475     WorklistRemover DeadNodes(*this);
1476     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1477     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1478     deleteAndRecombine(N);
1479     AddToWorklist(Result.getNode());
1480     return true;
1481   }
1482   return false;
1483 }
1484 
1485 /// Recursively delete a node which has no uses and any operands for
1486 /// which it is the only use.
1487 ///
1488 /// Note that this both deletes the nodes and removes them from the worklist.
1489 /// It also adds any nodes who have had a user deleted to the worklist as they
1490 /// may now have only one use and subject to other combines.
1491 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1492   if (!N->use_empty())
1493     return false;
1494 
1495   SmallSetVector<SDNode *, 16> Nodes;
1496   Nodes.insert(N);
1497   do {
1498     N = Nodes.pop_back_val();
1499     if (!N)
1500       continue;
1501 
1502     if (N->use_empty()) {
1503       for (const SDValue &ChildN : N->op_values())
1504         Nodes.insert(ChildN.getNode());
1505 
1506       removeFromWorklist(N);
1507       DAG.DeleteNode(N);
1508     } else {
1509       AddToWorklist(N);
1510     }
1511   } while (!Nodes.empty());
1512   return true;
1513 }
1514 
1515 //===----------------------------------------------------------------------===//
1516 //  Main DAG Combiner implementation
1517 //===----------------------------------------------------------------------===//
1518 
1519 void DAGCombiner::Run(CombineLevel AtLevel) {
1520   // set the instance variables, so that the various visit routines may use it.
1521   Level = AtLevel;
1522   LegalDAG = Level >= AfterLegalizeDAG;
1523   LegalOperations = Level >= AfterLegalizeVectorOps;
1524   LegalTypes = Level >= AfterLegalizeTypes;
1525 
1526   WorklistInserter AddNodes(*this);
1527 
1528   // Add all the dag nodes to the worklist.
1529   for (SDNode &Node : DAG.allnodes())
1530     AddToWorklist(&Node);
1531 
1532   // Create a dummy node (which is not added to allnodes), that adds a reference
1533   // to the root node, preventing it from being deleted, and tracking any
1534   // changes of the root.
1535   HandleSDNode Dummy(DAG.getRoot());
1536 
1537   // While we have a valid worklist entry node, try to combine it.
1538   while (SDNode *N = getNextWorklistEntry()) {
1539     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1540     // N is deleted from the DAG, since they too may now be dead or may have a
1541     // reduced number of uses, allowing other xforms.
1542     if (recursivelyDeleteUnusedNodes(N))
1543       continue;
1544 
1545     WorklistRemover DeadNodes(*this);
1546 
1547     // If this combine is running after legalizing the DAG, re-legalize any
1548     // nodes pulled off the worklist.
1549     if (LegalDAG) {
1550       SmallSetVector<SDNode *, 16> UpdatedNodes;
1551       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1552 
1553       for (SDNode *LN : UpdatedNodes)
1554         AddToWorklistWithUsers(LN);
1555 
1556       if (!NIsValid)
1557         continue;
1558     }
1559 
1560     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1561 
1562     // Add any operands of the new node which have not yet been combined to the
1563     // worklist as well. Because the worklist uniques things already, this
1564     // won't repeatedly process the same operand.
1565     CombinedNodes.insert(N);
1566     for (const SDValue &ChildN : N->op_values())
1567       if (!CombinedNodes.count(ChildN.getNode()))
1568         AddToWorklist(ChildN.getNode());
1569 
1570     SDValue RV = combine(N);
1571 
1572     if (!RV.getNode())
1573       continue;
1574 
1575     ++NodesCombined;
1576 
1577     // If we get back the same node we passed in, rather than a new node or
1578     // zero, we know that the node must have defined multiple values and
1579     // CombineTo was used.  Since CombineTo takes care of the worklist
1580     // mechanics for us, we have no work to do in this case.
1581     if (RV.getNode() == N)
1582       continue;
1583 
1584     assert(N->getOpcode() != ISD::DELETED_NODE &&
1585            RV.getOpcode() != ISD::DELETED_NODE &&
1586            "Node was deleted but visit returned new node!");
1587 
1588     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1589 
1590     if (N->getNumValues() == RV->getNumValues())
1591       DAG.ReplaceAllUsesWith(N, RV.getNode());
1592     else {
1593       assert(N->getValueType(0) == RV.getValueType() &&
1594              N->getNumValues() == 1 && "Type mismatch");
1595       DAG.ReplaceAllUsesWith(N, &RV);
1596     }
1597 
1598     // Push the new node and any users onto the worklist.  Omit this if the
1599     // new node is the EntryToken (e.g. if a store managed to get optimized
1600     // out), because re-visiting the EntryToken and its users will not uncover
1601     // any additional opportunities, but there may be a large number of such
1602     // users, potentially causing compile time explosion.
1603     if (RV.getOpcode() != ISD::EntryToken) {
1604       AddToWorklist(RV.getNode());
1605       AddUsersToWorklist(RV.getNode());
1606     }
1607 
1608     // Finally, if the node is now dead, remove it from the graph.  The node
1609     // may not be dead if the replacement process recursively simplified to
1610     // something else needing this node. This will also take care of adding any
1611     // operands which have lost a user to the worklist.
1612     recursivelyDeleteUnusedNodes(N);
1613   }
1614 
1615   // If the root changed (e.g. it was a dead load, update the root).
1616   DAG.setRoot(Dummy.getValue());
1617   DAG.RemoveDeadNodes();
1618 }
1619 
1620 SDValue DAGCombiner::visit(SDNode *N) {
1621   switch (N->getOpcode()) {
1622   default: break;
1623   case ISD::TokenFactor:        return visitTokenFactor(N);
1624   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1625   case ISD::ADD:                return visitADD(N);
1626   case ISD::SUB:                return visitSUB(N);
1627   case ISD::SADDSAT:
1628   case ISD::UADDSAT:            return visitADDSAT(N);
1629   case ISD::SSUBSAT:
1630   case ISD::USUBSAT:            return visitSUBSAT(N);
1631   case ISD::ADDC:               return visitADDC(N);
1632   case ISD::SADDO:
1633   case ISD::UADDO:              return visitADDO(N);
1634   case ISD::SUBC:               return visitSUBC(N);
1635   case ISD::SSUBO:
1636   case ISD::USUBO:              return visitSUBO(N);
1637   case ISD::ADDE:               return visitADDE(N);
1638   case ISD::ADDCARRY:           return visitADDCARRY(N);
1639   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1640   case ISD::SUBE:               return visitSUBE(N);
1641   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1642   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1643   case ISD::SMULFIX:
1644   case ISD::SMULFIXSAT:
1645   case ISD::UMULFIX:
1646   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1647   case ISD::MUL:                return visitMUL(N);
1648   case ISD::SDIV:               return visitSDIV(N);
1649   case ISD::UDIV:               return visitUDIV(N);
1650   case ISD::SREM:
1651   case ISD::UREM:               return visitREM(N);
1652   case ISD::MULHU:              return visitMULHU(N);
1653   case ISD::MULHS:              return visitMULHS(N);
1654   case ISD::AVGFLOORS:
1655   case ISD::AVGFLOORU:
1656   case ISD::AVGCEILS:
1657   case ISD::AVGCEILU:           return visitAVG(N);
1658   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1659   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1660   case ISD::SMULO:
1661   case ISD::UMULO:              return visitMULO(N);
1662   case ISD::SMIN:
1663   case ISD::SMAX:
1664   case ISD::UMIN:
1665   case ISD::UMAX:               return visitIMINMAX(N);
1666   case ISD::AND:                return visitAND(N);
1667   case ISD::OR:                 return visitOR(N);
1668   case ISD::XOR:                return visitXOR(N);
1669   case ISD::SHL:                return visitSHL(N);
1670   case ISD::SRA:                return visitSRA(N);
1671   case ISD::SRL:                return visitSRL(N);
1672   case ISD::ROTR:
1673   case ISD::ROTL:               return visitRotate(N);
1674   case ISD::FSHL:
1675   case ISD::FSHR:               return visitFunnelShift(N);
1676   case ISD::SSHLSAT:
1677   case ISD::USHLSAT:            return visitSHLSAT(N);
1678   case ISD::ABS:                return visitABS(N);
1679   case ISD::BSWAP:              return visitBSWAP(N);
1680   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1681   case ISD::CTLZ:               return visitCTLZ(N);
1682   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1683   case ISD::CTTZ:               return visitCTTZ(N);
1684   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1685   case ISD::CTPOP:              return visitCTPOP(N);
1686   case ISD::SELECT:             return visitSELECT(N);
1687   case ISD::VSELECT:            return visitVSELECT(N);
1688   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1689   case ISD::SETCC:              return visitSETCC(N);
1690   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1691   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1692   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1693   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1694   case ISD::AssertSext:
1695   case ISD::AssertZext:         return visitAssertExt(N);
1696   case ISD::AssertAlign:        return visitAssertAlign(N);
1697   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1698   case ISD::SIGN_EXTEND_VECTOR_INREG:
1699   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1700   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1701   case ISD::BITCAST:            return visitBITCAST(N);
1702   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1703   case ISD::FADD:               return visitFADD(N);
1704   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1705   case ISD::FSUB:               return visitFSUB(N);
1706   case ISD::FMUL:               return visitFMUL(N);
1707   case ISD::FMA:                return visitFMA(N);
1708   case ISD::FDIV:               return visitFDIV(N);
1709   case ISD::FREM:               return visitFREM(N);
1710   case ISD::FSQRT:              return visitFSQRT(N);
1711   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1712   case ISD::FPOW:               return visitFPOW(N);
1713   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1714   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1715   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1716   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1717   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1718   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1719   case ISD::FNEG:               return visitFNEG(N);
1720   case ISD::FABS:               return visitFABS(N);
1721   case ISD::FFLOOR:             return visitFFLOOR(N);
1722   case ISD::FMINNUM:
1723   case ISD::FMAXNUM:
1724   case ISD::FMINIMUM:
1725   case ISD::FMAXIMUM:           return visitFMinMax(N);
1726   case ISD::FCEIL:              return visitFCEIL(N);
1727   case ISD::FTRUNC:             return visitFTRUNC(N);
1728   case ISD::BRCOND:             return visitBRCOND(N);
1729   case ISD::BR_CC:              return visitBR_CC(N);
1730   case ISD::LOAD:               return visitLOAD(N);
1731   case ISD::STORE:              return visitSTORE(N);
1732   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1733   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1734   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1735   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1736   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1737   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1738   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1739   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1740   case ISD::MGATHER:            return visitMGATHER(N);
1741   case ISD::MLOAD:              return visitMLOAD(N);
1742   case ISD::MSCATTER:           return visitMSCATTER(N);
1743   case ISD::MSTORE:             return visitMSTORE(N);
1744   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1745   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1746   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1747   case ISD::FREEZE:             return visitFREEZE(N);
1748   case ISD::VECREDUCE_FADD:
1749   case ISD::VECREDUCE_FMUL:
1750   case ISD::VECREDUCE_ADD:
1751   case ISD::VECREDUCE_MUL:
1752   case ISD::VECREDUCE_AND:
1753   case ISD::VECREDUCE_OR:
1754   case ISD::VECREDUCE_XOR:
1755   case ISD::VECREDUCE_SMAX:
1756   case ISD::VECREDUCE_SMIN:
1757   case ISD::VECREDUCE_UMAX:
1758   case ISD::VECREDUCE_UMIN:
1759   case ISD::VECREDUCE_FMAX:
1760   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1761 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1762 #include "llvm/IR/VPIntrinsics.def"
1763     return visitVPOp(N);
1764   }
1765   return SDValue();
1766 }
1767 
1768 SDValue DAGCombiner::combine(SDNode *N) {
1769   SDValue RV;
1770   if (!DisableGenericCombines)
1771     RV = visit(N);
1772 
1773   // If nothing happened, try a target-specific DAG combine.
1774   if (!RV.getNode()) {
1775     assert(N->getOpcode() != ISD::DELETED_NODE &&
1776            "Node was deleted but visit returned NULL!");
1777 
1778     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1779         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1780 
1781       // Expose the DAG combiner to the target combiner impls.
1782       TargetLowering::DAGCombinerInfo
1783         DagCombineInfo(DAG, Level, false, this);
1784 
1785       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1786     }
1787   }
1788 
1789   // If nothing happened still, try promoting the operation.
1790   if (!RV.getNode()) {
1791     switch (N->getOpcode()) {
1792     default: break;
1793     case ISD::ADD:
1794     case ISD::SUB:
1795     case ISD::MUL:
1796     case ISD::AND:
1797     case ISD::OR:
1798     case ISD::XOR:
1799       RV = PromoteIntBinOp(SDValue(N, 0));
1800       break;
1801     case ISD::SHL:
1802     case ISD::SRA:
1803     case ISD::SRL:
1804       RV = PromoteIntShiftOp(SDValue(N, 0));
1805       break;
1806     case ISD::SIGN_EXTEND:
1807     case ISD::ZERO_EXTEND:
1808     case ISD::ANY_EXTEND:
1809       RV = PromoteExtend(SDValue(N, 0));
1810       break;
1811     case ISD::LOAD:
1812       if (PromoteLoad(SDValue(N, 0)))
1813         RV = SDValue(N, 0);
1814       break;
1815     }
1816   }
1817 
1818   // If N is a commutative binary node, try to eliminate it if the commuted
1819   // version is already present in the DAG.
1820   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1821       N->getNumValues() == 1) {
1822     SDValue N0 = N->getOperand(0);
1823     SDValue N1 = N->getOperand(1);
1824 
1825     // Constant operands are canonicalized to RHS.
1826     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1827       SDValue Ops[] = {N1, N0};
1828       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1829                                             N->getFlags());
1830       if (CSENode)
1831         return SDValue(CSENode, 0);
1832     }
1833   }
1834 
1835   return RV;
1836 }
1837 
1838 /// Given a node, return its input chain if it has one, otherwise return a null
1839 /// sd operand.
1840 static SDValue getInputChainForNode(SDNode *N) {
1841   if (unsigned NumOps = N->getNumOperands()) {
1842     if (N->getOperand(0).getValueType() == MVT::Other)
1843       return N->getOperand(0);
1844     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1845       return N->getOperand(NumOps-1);
1846     for (unsigned i = 1; i < NumOps-1; ++i)
1847       if (N->getOperand(i).getValueType() == MVT::Other)
1848         return N->getOperand(i);
1849   }
1850   return SDValue();
1851 }
1852 
1853 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1854   // If N has two operands, where one has an input chain equal to the other,
1855   // the 'other' chain is redundant.
1856   if (N->getNumOperands() == 2) {
1857     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1858       return N->getOperand(0);
1859     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1860       return N->getOperand(1);
1861   }
1862 
1863   // Don't simplify token factors if optnone.
1864   if (OptLevel == CodeGenOpt::None)
1865     return SDValue();
1866 
1867   // Don't simplify the token factor if the node itself has too many operands.
1868   if (N->getNumOperands() > TokenFactorInlineLimit)
1869     return SDValue();
1870 
1871   // If the sole user is a token factor, we should make sure we have a
1872   // chance to merge them together. This prevents TF chains from inhibiting
1873   // optimizations.
1874   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1875     AddToWorklist(*(N->use_begin()));
1876 
1877   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1878   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1879   SmallPtrSet<SDNode*, 16> SeenOps;
1880   bool Changed = false;             // If we should replace this token factor.
1881 
1882   // Start out with this token factor.
1883   TFs.push_back(N);
1884 
1885   // Iterate through token factors.  The TFs grows when new token factors are
1886   // encountered.
1887   for (unsigned i = 0; i < TFs.size(); ++i) {
1888     // Limit number of nodes to inline, to avoid quadratic compile times.
1889     // We have to add the outstanding Token Factors to Ops, otherwise we might
1890     // drop Ops from the resulting Token Factors.
1891     if (Ops.size() > TokenFactorInlineLimit) {
1892       for (unsigned j = i; j < TFs.size(); j++)
1893         Ops.emplace_back(TFs[j], 0);
1894       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1895       // combiner worklist later.
1896       TFs.resize(i);
1897       break;
1898     }
1899 
1900     SDNode *TF = TFs[i];
1901     // Check each of the operands.
1902     for (const SDValue &Op : TF->op_values()) {
1903       switch (Op.getOpcode()) {
1904       case ISD::EntryToken:
1905         // Entry tokens don't need to be added to the list. They are
1906         // redundant.
1907         Changed = true;
1908         break;
1909 
1910       case ISD::TokenFactor:
1911         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1912           // Queue up for processing.
1913           TFs.push_back(Op.getNode());
1914           Changed = true;
1915           break;
1916         }
1917         LLVM_FALLTHROUGH;
1918 
1919       default:
1920         // Only add if it isn't already in the list.
1921         if (SeenOps.insert(Op.getNode()).second)
1922           Ops.push_back(Op);
1923         else
1924           Changed = true;
1925         break;
1926       }
1927     }
1928   }
1929 
1930   // Re-visit inlined Token Factors, to clean them up in case they have been
1931   // removed. Skip the first Token Factor, as this is the current node.
1932   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1933     AddToWorklist(TFs[i]);
1934 
1935   // Remove Nodes that are chained to another node in the list. Do so
1936   // by walking up chains breath-first stopping when we've seen
1937   // another operand. In general we must climb to the EntryNode, but we can exit
1938   // early if we find all remaining work is associated with just one operand as
1939   // no further pruning is possible.
1940 
1941   // List of nodes to search through and original Ops from which they originate.
1942   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1943   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1944   SmallPtrSet<SDNode *, 16> SeenChains;
1945   bool DidPruneOps = false;
1946 
1947   unsigned NumLeftToConsider = 0;
1948   for (const SDValue &Op : Ops) {
1949     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1950     OpWorkCount.push_back(1);
1951   }
1952 
1953   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1954     // If this is an Op, we can remove the op from the list. Remark any
1955     // search associated with it as from the current OpNumber.
1956     if (SeenOps.contains(Op)) {
1957       Changed = true;
1958       DidPruneOps = true;
1959       unsigned OrigOpNumber = 0;
1960       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1961         OrigOpNumber++;
1962       assert((OrigOpNumber != Ops.size()) &&
1963              "expected to find TokenFactor Operand");
1964       // Re-mark worklist from OrigOpNumber to OpNumber
1965       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1966         if (Worklist[i].second == OrigOpNumber) {
1967           Worklist[i].second = OpNumber;
1968         }
1969       }
1970       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1971       OpWorkCount[OrigOpNumber] = 0;
1972       NumLeftToConsider--;
1973     }
1974     // Add if it's a new chain
1975     if (SeenChains.insert(Op).second) {
1976       OpWorkCount[OpNumber]++;
1977       Worklist.push_back(std::make_pair(Op, OpNumber));
1978     }
1979   };
1980 
1981   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1982     // We need at least be consider at least 2 Ops to prune.
1983     if (NumLeftToConsider <= 1)
1984       break;
1985     auto CurNode = Worklist[i].first;
1986     auto CurOpNumber = Worklist[i].second;
1987     assert((OpWorkCount[CurOpNumber] > 0) &&
1988            "Node should not appear in worklist");
1989     switch (CurNode->getOpcode()) {
1990     case ISD::EntryToken:
1991       // Hitting EntryToken is the only way for the search to terminate without
1992       // hitting
1993       // another operand's search. Prevent us from marking this operand
1994       // considered.
1995       NumLeftToConsider++;
1996       break;
1997     case ISD::TokenFactor:
1998       for (const SDValue &Op : CurNode->op_values())
1999         AddToWorklist(i, Op.getNode(), CurOpNumber);
2000       break;
2001     case ISD::LIFETIME_START:
2002     case ISD::LIFETIME_END:
2003     case ISD::CopyFromReg:
2004     case ISD::CopyToReg:
2005       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2006       break;
2007     default:
2008       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2009         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2010       break;
2011     }
2012     OpWorkCount[CurOpNumber]--;
2013     if (OpWorkCount[CurOpNumber] == 0)
2014       NumLeftToConsider--;
2015   }
2016 
2017   // If we've changed things around then replace token factor.
2018   if (Changed) {
2019     SDValue Result;
2020     if (Ops.empty()) {
2021       // The entry token is the only possible outcome.
2022       Result = DAG.getEntryNode();
2023     } else {
2024       if (DidPruneOps) {
2025         SmallVector<SDValue, 8> PrunedOps;
2026         //
2027         for (const SDValue &Op : Ops) {
2028           if (SeenChains.count(Op.getNode()) == 0)
2029             PrunedOps.push_back(Op);
2030         }
2031         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2032       } else {
2033         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2034       }
2035     }
2036     return Result;
2037   }
2038   return SDValue();
2039 }
2040 
2041 /// MERGE_VALUES can always be eliminated.
2042 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2043   WorklistRemover DeadNodes(*this);
2044   // Replacing results may cause a different MERGE_VALUES to suddenly
2045   // be CSE'd with N, and carry its uses with it. Iterate until no
2046   // uses remain, to ensure that the node can be safely deleted.
2047   // First add the users of this node to the work list so that they
2048   // can be tried again once they have new operands.
2049   AddUsersToWorklist(N);
2050   do {
2051     // Do as a single replacement to avoid rewalking use lists.
2052     SmallVector<SDValue, 8> Ops;
2053     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2054       Ops.push_back(N->getOperand(i));
2055     DAG.ReplaceAllUsesWith(N, Ops.data());
2056   } while (!N->use_empty());
2057   deleteAndRecombine(N);
2058   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2059 }
2060 
2061 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2062 /// ConstantSDNode pointer else nullptr.
2063 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2064   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2065   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2066 }
2067 
2068 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2069 /// and that N may be folded in the load / store addressing mode.
2070 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2071                                     const TargetLowering &TLI) {
2072   EVT VT;
2073   unsigned AS;
2074 
2075   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2076     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2077       return false;
2078     VT = LD->getMemoryVT();
2079     AS = LD->getAddressSpace();
2080   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2081     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2082       return false;
2083     VT = ST->getMemoryVT();
2084     AS = ST->getAddressSpace();
2085   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2086     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2087       return false;
2088     VT = LD->getMemoryVT();
2089     AS = LD->getAddressSpace();
2090   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2091     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2092       return false;
2093     VT = ST->getMemoryVT();
2094     AS = ST->getAddressSpace();
2095   } else
2096     return false;
2097 
2098   TargetLowering::AddrMode AM;
2099   if (N->getOpcode() == ISD::ADD) {
2100     AM.HasBaseReg = true;
2101     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2102     if (Offset)
2103       // [reg +/- imm]
2104       AM.BaseOffs = Offset->getSExtValue();
2105     else
2106       // [reg +/- reg]
2107       AM.Scale = 1;
2108   } else if (N->getOpcode() == ISD::SUB) {
2109     AM.HasBaseReg = true;
2110     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2111     if (Offset)
2112       // [reg +/- imm]
2113       AM.BaseOffs = -Offset->getSExtValue();
2114     else
2115       // [reg +/- reg]
2116       AM.Scale = 1;
2117   } else
2118     return false;
2119 
2120   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2121                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2122 }
2123 
2124 /// This inverts a canonicalization in IR that replaces a variable select arm
2125 /// with an identity constant. Codegen improves if we re-use the variable
2126 /// operand rather than load a constant. This can also be converted into a
2127 /// masked vector operation if the target supports it.
2128 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2129                                               bool ShouldCommuteOperands) {
2130   // Match a select as operand 1. The identity constant that we are looking for
2131   // is only valid as operand 1 of a non-commutative binop.
2132   SDValue N0 = N->getOperand(0);
2133   SDValue N1 = N->getOperand(1);
2134   if (ShouldCommuteOperands)
2135     std::swap(N0, N1);
2136 
2137   // TODO: Should this apply to scalar select too?
2138   if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2139     return SDValue();
2140 
2141   unsigned Opcode = N->getOpcode();
2142   EVT VT = N->getValueType(0);
2143   SDValue Cond = N1.getOperand(0);
2144   SDValue TVal = N1.getOperand(1);
2145   SDValue FVal = N1.getOperand(2);
2146 
2147   // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2148   // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2149   // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2150   auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2151     if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
2152       switch (Opcode) {
2153       case ISD::FADD: // X + -0.0 --> X
2154         return C->isZero() && C->isNegative();
2155       case ISD::FSUB: // X - 0.0 --> X
2156         return C->isZero() && !C->isNegative();
2157       case ISD::FMUL: // X * 1.0 --> X
2158       case ISD::FDIV: // X / 1.0 --> X
2159         return C->isExactlyValue(1.0);
2160       }
2161     }
2162     if (ConstantSDNode *C = isConstOrConstSplat(V)) {
2163       switch (Opcode) {
2164       case ISD::SUB: // X - 0 --> X
2165         return C->isZero();
2166       }
2167     }
2168     return false;
2169   };
2170 
2171   // This transform increases uses of N0, so freeze it to be safe.
2172   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2173   if (isIdentityConstantForOpcode(Opcode, TVal)) {
2174     SDValue F0 = DAG.getFreeze(N0);
2175     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2176     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2177   }
2178   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2179   if (isIdentityConstantForOpcode(Opcode, FVal)) {
2180     SDValue F0 = DAG.getFreeze(N0);
2181     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2182     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2183   }
2184 
2185   return SDValue();
2186 }
2187 
2188 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2189   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2190          "Unexpected binary operator");
2191 
2192   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2193   auto BinOpcode = BO->getOpcode();
2194   EVT VT = BO->getValueType(0);
2195   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2196     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2197       return Sel;
2198 
2199     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2200       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2201         return Sel;
2202   }
2203 
2204   // Don't do this unless the old select is going away. We want to eliminate the
2205   // binary operator, not replace a binop with a select.
2206   // TODO: Handle ISD::SELECT_CC.
2207   unsigned SelOpNo = 0;
2208   SDValue Sel = BO->getOperand(0);
2209   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2210     SelOpNo = 1;
2211     Sel = BO->getOperand(1);
2212   }
2213 
2214   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2215     return SDValue();
2216 
2217   SDValue CT = Sel.getOperand(1);
2218   if (!isConstantOrConstantVector(CT, true) &&
2219       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2220     return SDValue();
2221 
2222   SDValue CF = Sel.getOperand(2);
2223   if (!isConstantOrConstantVector(CF, true) &&
2224       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2225     return SDValue();
2226 
2227   // Bail out if any constants are opaque because we can't constant fold those.
2228   // The exception is "and" and "or" with either 0 or -1 in which case we can
2229   // propagate non constant operands into select. I.e.:
2230   // and (select Cond, 0, -1), X --> select Cond, 0, X
2231   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2232   bool CanFoldNonConst =
2233       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2234       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2235       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2236 
2237   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2238   if (!CanFoldNonConst &&
2239       !isConstantOrConstantVector(CBO, true) &&
2240       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2241     return SDValue();
2242 
2243   // We have a select-of-constants followed by a binary operator with a
2244   // constant. Eliminate the binop by pulling the constant math into the select.
2245   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2246   SDLoc DL(Sel);
2247   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2248                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2249   if (!CanFoldNonConst && !NewCT.isUndef() &&
2250       !isConstantOrConstantVector(NewCT, true) &&
2251       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2252     return SDValue();
2253 
2254   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2255                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2256   if (!CanFoldNonConst && !NewCF.isUndef() &&
2257       !isConstantOrConstantVector(NewCF, true) &&
2258       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2259     return SDValue();
2260 
2261   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2262   SelectOp->setFlags(BO->getFlags());
2263   return SelectOp;
2264 }
2265 
2266 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2267   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2268          "Expecting add or sub");
2269 
2270   // Match a constant operand and a zext operand for the math instruction:
2271   // add Z, C
2272   // sub C, Z
2273   bool IsAdd = N->getOpcode() == ISD::ADD;
2274   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2275   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2276   auto *CN = dyn_cast<ConstantSDNode>(C);
2277   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2278     return SDValue();
2279 
2280   // Match the zext operand as a setcc of a boolean.
2281   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2282       Z.getOperand(0).getValueType() != MVT::i1)
2283     return SDValue();
2284 
2285   // Match the compare as: setcc (X & 1), 0, eq.
2286   SDValue SetCC = Z.getOperand(0);
2287   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2288   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2289       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2290       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2291     return SDValue();
2292 
2293   // We are adding/subtracting a constant and an inverted low bit. Turn that
2294   // into a subtract/add of the low bit with incremented/decremented constant:
2295   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2296   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2297   EVT VT = C.getValueType();
2298   SDLoc DL(N);
2299   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2300   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2301                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2302   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2303 }
2304 
2305 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2306 /// a shift and add with a different constant.
2307 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2308   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2309          "Expecting add or sub");
2310 
2311   // We need a constant operand for the add/sub, and the other operand is a
2312   // logical shift right: add (srl), C or sub C, (srl).
2313   bool IsAdd = N->getOpcode() == ISD::ADD;
2314   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2315   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2316   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2317       ShiftOp.getOpcode() != ISD::SRL)
2318     return SDValue();
2319 
2320   // The shift must be of a 'not' value.
2321   SDValue Not = ShiftOp.getOperand(0);
2322   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2323     return SDValue();
2324 
2325   // The shift must be moving the sign bit to the least-significant-bit.
2326   EVT VT = ShiftOp.getValueType();
2327   SDValue ShAmt = ShiftOp.getOperand(1);
2328   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2329   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2330     return SDValue();
2331 
2332   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2333   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2334   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2335   SDLoc DL(N);
2336   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2337   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2338   if (SDValue NewC =
2339           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2340                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2341     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2342   return SDValue();
2343 }
2344 
2345 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2346 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2347 /// are no common bits set in the operands).
2348 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2349   SDValue N0 = N->getOperand(0);
2350   SDValue N1 = N->getOperand(1);
2351   EVT VT = N0.getValueType();
2352   SDLoc DL(N);
2353 
2354   // fold (add x, undef) -> undef
2355   if (N0.isUndef())
2356     return N0;
2357   if (N1.isUndef())
2358     return N1;
2359 
2360   // fold (add c1, c2) -> c1+c2
2361   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2362     return C;
2363 
2364   // canonicalize constant to RHS
2365   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2366       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2367     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2368 
2369   // fold vector ops
2370   if (VT.isVector()) {
2371     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2372       return FoldedVOp;
2373 
2374     // fold (add x, 0) -> x, vector edition
2375     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2376       return N0;
2377   }
2378 
2379   // fold (add x, 0) -> x
2380   if (isNullConstant(N1))
2381     return N0;
2382 
2383   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2384     // fold ((A-c1)+c2) -> (A+(c2-c1))
2385     if (N0.getOpcode() == ISD::SUB &&
2386         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2387       SDValue Sub =
2388           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2389       assert(Sub && "Constant folding failed");
2390       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2391     }
2392 
2393     // fold ((c1-A)+c2) -> (c1+c2)-A
2394     if (N0.getOpcode() == ISD::SUB &&
2395         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2396       SDValue Add =
2397           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2398       assert(Add && "Constant folding failed");
2399       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2400     }
2401 
2402     // add (sext i1 X), 1 -> zext (not i1 X)
2403     // We don't transform this pattern:
2404     //   add (zext i1 X), -1 -> sext (not i1 X)
2405     // because most (?) targets generate better code for the zext form.
2406     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2407         isOneOrOneSplat(N1)) {
2408       SDValue X = N0.getOperand(0);
2409       if ((!LegalOperations ||
2410            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2411             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2412           X.getScalarValueSizeInBits() == 1) {
2413         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2414         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2415       }
2416     }
2417 
2418     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2419     // equivalent to (add x, c0).
2420     if (N0.getOpcode() == ISD::OR &&
2421         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2422         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2423       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2424                                                     {N1, N0.getOperand(1)}))
2425         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2426     }
2427   }
2428 
2429   if (SDValue NewSel = foldBinOpIntoSelect(N))
2430     return NewSel;
2431 
2432   // reassociate add
2433   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2434     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2435       return RADD;
2436 
2437     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2438     // equivalent to (add x, c).
2439     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2440       if (N0.getOpcode() == ISD::OR && N0.hasOneUse() &&
2441           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2442           DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2443         return DAG.getNode(ISD::ADD, DL, VT,
2444                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2445                            N0.getOperand(1));
2446       }
2447       return SDValue();
2448     };
2449     if (SDValue Add = ReassociateAddOr(N0, N1))
2450       return Add;
2451     if (SDValue Add = ReassociateAddOr(N1, N0))
2452       return Add;
2453   }
2454   // fold ((0-A) + B) -> B-A
2455   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2456     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2457 
2458   // fold (A + (0-B)) -> A-B
2459   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2460     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2461 
2462   // fold (A+(B-A)) -> B
2463   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2464     return N1.getOperand(0);
2465 
2466   // fold ((B-A)+A) -> B
2467   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2468     return N0.getOperand(0);
2469 
2470   // fold ((A-B)+(C-A)) -> (C-B)
2471   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2472       N0.getOperand(0) == N1.getOperand(1))
2473     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2474                        N0.getOperand(1));
2475 
2476   // fold ((A-B)+(B-C)) -> (A-C)
2477   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2478       N0.getOperand(1) == N1.getOperand(0))
2479     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2480                        N1.getOperand(1));
2481 
2482   // fold (A+(B-(A+C))) to (B-C)
2483   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2484       N0 == N1.getOperand(1).getOperand(0))
2485     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2486                        N1.getOperand(1).getOperand(1));
2487 
2488   // fold (A+(B-(C+A))) to (B-C)
2489   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2490       N0 == N1.getOperand(1).getOperand(1))
2491     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2492                        N1.getOperand(1).getOperand(0));
2493 
2494   // fold (A+((B-A)+or-C)) to (B+or-C)
2495   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2496       N1.getOperand(0).getOpcode() == ISD::SUB &&
2497       N0 == N1.getOperand(0).getOperand(1))
2498     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2499                        N1.getOperand(1));
2500 
2501   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2502   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2503     SDValue N00 = N0.getOperand(0);
2504     SDValue N01 = N0.getOperand(1);
2505     SDValue N10 = N1.getOperand(0);
2506     SDValue N11 = N1.getOperand(1);
2507 
2508     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2509       return DAG.getNode(ISD::SUB, DL, VT,
2510                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2511                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2512   }
2513 
2514   // fold (add (umax X, C), -C) --> (usubsat X, C)
2515   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2516     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2517       return (!Max && !Op) ||
2518              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2519     };
2520     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2521                                   /*AllowUndefs*/ true))
2522       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2523                          N0.getOperand(1));
2524   }
2525 
2526   if (SimplifyDemandedBits(SDValue(N, 0)))
2527     return SDValue(N, 0);
2528 
2529   if (isOneOrOneSplat(N1)) {
2530     // fold (add (xor a, -1), 1) -> (sub 0, a)
2531     if (isBitwiseNot(N0))
2532       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2533                          N0.getOperand(0));
2534 
2535     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2536     if (N0.getOpcode() == ISD::ADD) {
2537       SDValue A, Xor;
2538 
2539       if (isBitwiseNot(N0.getOperand(0))) {
2540         A = N0.getOperand(1);
2541         Xor = N0.getOperand(0);
2542       } else if (isBitwiseNot(N0.getOperand(1))) {
2543         A = N0.getOperand(0);
2544         Xor = N0.getOperand(1);
2545       }
2546 
2547       if (Xor)
2548         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2549     }
2550 
2551     // Look for:
2552     //   add (add x, y), 1
2553     // And if the target does not like this form then turn into:
2554     //   sub y, (xor x, -1)
2555     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2556         N0.getOpcode() == ISD::ADD) {
2557       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2558                                 DAG.getAllOnesConstant(DL, VT));
2559       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2560     }
2561   }
2562 
2563   // (x - y) + -1  ->  add (xor y, -1), x
2564   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2565       isAllOnesOrAllOnesSplat(N1)) {
2566     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2567     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2568   }
2569 
2570   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2571     return Combined;
2572 
2573   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2574     return Combined;
2575 
2576   return SDValue();
2577 }
2578 
2579 SDValue DAGCombiner::visitADD(SDNode *N) {
2580   SDValue N0 = N->getOperand(0);
2581   SDValue N1 = N->getOperand(1);
2582   EVT VT = N0.getValueType();
2583   SDLoc DL(N);
2584 
2585   if (SDValue Combined = visitADDLike(N))
2586     return Combined;
2587 
2588   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2589     return V;
2590 
2591   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2592     return V;
2593 
2594   // fold (a+b) -> (a|b) iff a and b share no bits.
2595   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2596       DAG.haveNoCommonBitsSet(N0, N1))
2597     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2598 
2599   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2600   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2601     const APInt &C0 = N0->getConstantOperandAPInt(0);
2602     const APInt &C1 = N1->getConstantOperandAPInt(0);
2603     return DAG.getVScale(DL, VT, C0 + C1);
2604   }
2605 
2606   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2607   if ((N0.getOpcode() == ISD::ADD) &&
2608       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2609       (N1.getOpcode() == ISD::VSCALE)) {
2610     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2611     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2612     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2613     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2614   }
2615 
2616   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2617   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2618       N1.getOpcode() == ISD::STEP_VECTOR) {
2619     const APInt &C0 = N0->getConstantOperandAPInt(0);
2620     const APInt &C1 = N1->getConstantOperandAPInt(0);
2621     APInt NewStep = C0 + C1;
2622     return DAG.getStepVector(DL, VT, NewStep);
2623   }
2624 
2625   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2626   if ((N0.getOpcode() == ISD::ADD) &&
2627       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2628       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2629     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2630     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2631     APInt NewStep = SV0 + SV1;
2632     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2633     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2634   }
2635 
2636   return SDValue();
2637 }
2638 
2639 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2640   unsigned Opcode = N->getOpcode();
2641   SDValue N0 = N->getOperand(0);
2642   SDValue N1 = N->getOperand(1);
2643   EVT VT = N0.getValueType();
2644   SDLoc DL(N);
2645 
2646   // fold (add_sat x, undef) -> -1
2647   if (N0.isUndef() || N1.isUndef())
2648     return DAG.getAllOnesConstant(DL, VT);
2649 
2650   // fold (add_sat c1, c2) -> c3
2651   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2652     return C;
2653 
2654   // canonicalize constant to RHS
2655   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2656       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2657     return DAG.getNode(Opcode, DL, VT, N1, N0);
2658 
2659   // fold vector ops
2660   if (VT.isVector()) {
2661     // TODO SimplifyVBinOp
2662 
2663     // fold (add_sat x, 0) -> x, vector edition
2664     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2665       return N0;
2666   }
2667 
2668   // fold (add_sat x, 0) -> x
2669   if (isNullConstant(N1))
2670     return N0;
2671 
2672   // If it cannot overflow, transform into an add.
2673   if (Opcode == ISD::UADDSAT)
2674     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2675       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2676 
2677   return SDValue();
2678 }
2679 
2680 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2681   bool Masked = false;
2682 
2683   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2684   while (true) {
2685     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2686       V = V.getOperand(0);
2687       continue;
2688     }
2689 
2690     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2691       Masked = true;
2692       V = V.getOperand(0);
2693       continue;
2694     }
2695 
2696     break;
2697   }
2698 
2699   // If this is not a carry, return.
2700   if (V.getResNo() != 1)
2701     return SDValue();
2702 
2703   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2704       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2705     return SDValue();
2706 
2707   EVT VT = V->getValueType(0);
2708   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2709     return SDValue();
2710 
2711   // If the result is masked, then no matter what kind of bool it is we can
2712   // return. If it isn't, then we need to make sure the bool type is either 0 or
2713   // 1 and not other values.
2714   if (Masked ||
2715       TLI.getBooleanContents(V.getValueType()) ==
2716           TargetLoweringBase::ZeroOrOneBooleanContent)
2717     return V;
2718 
2719   return SDValue();
2720 }
2721 
2722 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2723 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2724 /// the opcode and bypass the mask operation.
2725 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2726                                  SelectionDAG &DAG, const SDLoc &DL) {
2727   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2728     return SDValue();
2729 
2730   EVT VT = N0.getValueType();
2731   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2732     return SDValue();
2733 
2734   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2735   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2736   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2737 }
2738 
2739 /// Helper for doing combines based on N0 and N1 being added to each other.
2740 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2741                                           SDNode *LocReference) {
2742   EVT VT = N0.getValueType();
2743   SDLoc DL(LocReference);
2744 
2745   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2746   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2747       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2748     return DAG.getNode(ISD::SUB, DL, VT, N0,
2749                        DAG.getNode(ISD::SHL, DL, VT,
2750                                    N1.getOperand(0).getOperand(1),
2751                                    N1.getOperand(1)));
2752 
2753   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2754     return V;
2755 
2756   // Look for:
2757   //   add (add x, 1), y
2758   // And if the target does not like this form then turn into:
2759   //   sub y, (xor x, -1)
2760   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2761       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2762     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2763                               DAG.getAllOnesConstant(DL, VT));
2764     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2765   }
2766 
2767   // Hoist one-use subtraction by non-opaque constant:
2768   //   (x - C) + y  ->  (x + y) - C
2769   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2770   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2771       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2772     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2773     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2774   }
2775   // Hoist one-use subtraction from non-opaque constant:
2776   //   (C - x) + y  ->  (y - x) + C
2777   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2778       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2779     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2780     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2781   }
2782 
2783   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2784   // rather than 'add 0/-1' (the zext should get folded).
2785   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2786   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2787       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2788       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2789     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2790     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2791   }
2792 
2793   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2794   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2795     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2796     if (TN->getVT() == MVT::i1) {
2797       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2798                                  DAG.getConstant(1, DL, VT));
2799       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2800     }
2801   }
2802 
2803   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2804   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2805       N1.getResNo() == 0)
2806     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2807                        N0, N1.getOperand(0), N1.getOperand(2));
2808 
2809   // (add X, Carry) -> (addcarry X, 0, Carry)
2810   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2811     if (SDValue Carry = getAsCarry(TLI, N1))
2812       return DAG.getNode(ISD::ADDCARRY, DL,
2813                          DAG.getVTList(VT, Carry.getValueType()), N0,
2814                          DAG.getConstant(0, DL, VT), Carry);
2815 
2816   return SDValue();
2817 }
2818 
2819 SDValue DAGCombiner::visitADDC(SDNode *N) {
2820   SDValue N0 = N->getOperand(0);
2821   SDValue N1 = N->getOperand(1);
2822   EVT VT = N0.getValueType();
2823   SDLoc DL(N);
2824 
2825   // If the flag result is dead, turn this into an ADD.
2826   if (!N->hasAnyUseOfValue(1))
2827     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2828                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2829 
2830   // canonicalize constant to RHS.
2831   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2832   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2833   if (N0C && !N1C)
2834     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2835 
2836   // fold (addc x, 0) -> x + no carry out
2837   if (isNullConstant(N1))
2838     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2839                                         DL, MVT::Glue));
2840 
2841   // If it cannot overflow, transform into an add.
2842   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2843     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2844                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2845 
2846   return SDValue();
2847 }
2848 
2849 /**
2850  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2851  * then the flip also occurs if computing the inverse is the same cost.
2852  * This function returns an empty SDValue in case it cannot flip the boolean
2853  * without increasing the cost of the computation. If you want to flip a boolean
2854  * no matter what, use DAG.getLogicalNOT.
2855  */
2856 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2857                                   const TargetLowering &TLI,
2858                                   bool Force) {
2859   if (Force && isa<ConstantSDNode>(V))
2860     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2861 
2862   if (V.getOpcode() != ISD::XOR)
2863     return SDValue();
2864 
2865   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2866   if (!Const)
2867     return SDValue();
2868 
2869   EVT VT = V.getValueType();
2870 
2871   bool IsFlip = false;
2872   switch(TLI.getBooleanContents(VT)) {
2873     case TargetLowering::ZeroOrOneBooleanContent:
2874       IsFlip = Const->isOne();
2875       break;
2876     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2877       IsFlip = Const->isAllOnes();
2878       break;
2879     case TargetLowering::UndefinedBooleanContent:
2880       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2881       break;
2882   }
2883 
2884   if (IsFlip)
2885     return V.getOperand(0);
2886   if (Force)
2887     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2888   return SDValue();
2889 }
2890 
2891 SDValue DAGCombiner::visitADDO(SDNode *N) {
2892   SDValue N0 = N->getOperand(0);
2893   SDValue N1 = N->getOperand(1);
2894   EVT VT = N0.getValueType();
2895   bool IsSigned = (ISD::SADDO == N->getOpcode());
2896 
2897   EVT CarryVT = N->getValueType(1);
2898   SDLoc DL(N);
2899 
2900   // If the flag result is dead, turn this into an ADD.
2901   if (!N->hasAnyUseOfValue(1))
2902     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2903                      DAG.getUNDEF(CarryVT));
2904 
2905   // canonicalize constant to RHS.
2906   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2907       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2908     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2909 
2910   // fold (addo x, 0) -> x + no carry out
2911   if (isNullOrNullSplat(N1))
2912     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2913 
2914   if (!IsSigned) {
2915     // If it cannot overflow, transform into an add.
2916     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2917       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2918                        DAG.getConstant(0, DL, CarryVT));
2919 
2920     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2921     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2922       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2923                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2924       return CombineTo(
2925           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2926     }
2927 
2928     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2929       return Combined;
2930 
2931     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2932       return Combined;
2933   }
2934 
2935   return SDValue();
2936 }
2937 
2938 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2939   EVT VT = N0.getValueType();
2940   if (VT.isVector())
2941     return SDValue();
2942 
2943   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2944   // If Y + 1 cannot overflow.
2945   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2946     SDValue Y = N1.getOperand(0);
2947     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2948     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2949       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2950                          N1.getOperand(2));
2951   }
2952 
2953   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2954   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2955     if (SDValue Carry = getAsCarry(TLI, N1))
2956       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2957                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2958 
2959   return SDValue();
2960 }
2961 
2962 SDValue DAGCombiner::visitADDE(SDNode *N) {
2963   SDValue N0 = N->getOperand(0);
2964   SDValue N1 = N->getOperand(1);
2965   SDValue CarryIn = N->getOperand(2);
2966 
2967   // canonicalize constant to RHS
2968   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2969   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2970   if (N0C && !N1C)
2971     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2972                        N1, N0, CarryIn);
2973 
2974   // fold (adde x, y, false) -> (addc x, y)
2975   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2976     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2977 
2978   return SDValue();
2979 }
2980 
2981 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2982   SDValue N0 = N->getOperand(0);
2983   SDValue N1 = N->getOperand(1);
2984   SDValue CarryIn = N->getOperand(2);
2985   SDLoc DL(N);
2986 
2987   // canonicalize constant to RHS
2988   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2989   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2990   if (N0C && !N1C)
2991     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2992 
2993   // fold (addcarry x, y, false) -> (uaddo x, y)
2994   if (isNullConstant(CarryIn)) {
2995     if (!LegalOperations ||
2996         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2997       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2998   }
2999 
3000   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3001   if (isNullConstant(N0) && isNullConstant(N1)) {
3002     EVT VT = N0.getValueType();
3003     EVT CarryVT = CarryIn.getValueType();
3004     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3005     AddToWorklist(CarryExt.getNode());
3006     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3007                                     DAG.getConstant(1, DL, VT)),
3008                      DAG.getConstant(0, DL, CarryVT));
3009   }
3010 
3011   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3012     return Combined;
3013 
3014   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3015     return Combined;
3016 
3017   return SDValue();
3018 }
3019 
3020 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3021   SDValue N0 = N->getOperand(0);
3022   SDValue N1 = N->getOperand(1);
3023   SDValue CarryIn = N->getOperand(2);
3024   SDLoc DL(N);
3025 
3026   // canonicalize constant to RHS
3027   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3028   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3029   if (N0C && !N1C)
3030     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3031 
3032   // fold (saddo_carry x, y, false) -> (saddo x, y)
3033   if (isNullConstant(CarryIn)) {
3034     if (!LegalOperations ||
3035         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3036       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3037   }
3038 
3039   return SDValue();
3040 }
3041 
3042 /**
3043  * If we are facing some sort of diamond carry propapagtion pattern try to
3044  * break it up to generate something like:
3045  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
3046  *
3047  * The end result is usually an increase in operation required, but because the
3048  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3049  *
3050  * Patterns typically look something like
3051  *            (uaddo A, B)
3052  *             /       \
3053  *          Carry      Sum
3054  *            |          \
3055  *            | (addcarry *, 0, Z)
3056  *            |       /
3057  *             \   Carry
3058  *              |   /
3059  * (addcarry X, *, *)
3060  *
3061  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3062  * produce a combine with a single path for carry propagation.
3063  */
3064 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3065                                       SDValue X, SDValue Carry0, SDValue Carry1,
3066                                       SDNode *N) {
3067   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3068     return SDValue();
3069   if (Carry1.getOpcode() != ISD::UADDO)
3070     return SDValue();
3071 
3072   SDValue Z;
3073 
3074   /**
3075    * First look for a suitable Z. It will present itself in the form of
3076    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3077    */
3078   if (Carry0.getOpcode() == ISD::ADDCARRY &&
3079       isNullConstant(Carry0.getOperand(1))) {
3080     Z = Carry0.getOperand(2);
3081   } else if (Carry0.getOpcode() == ISD::UADDO &&
3082              isOneConstant(Carry0.getOperand(1))) {
3083     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3084     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3085   } else {
3086     // We couldn't find a suitable Z.
3087     return SDValue();
3088   }
3089 
3090 
3091   auto cancelDiamond = [&](SDValue A,SDValue B) {
3092     SDLoc DL(N);
3093     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3094     Combiner.AddToWorklist(NewY.getNode());
3095     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3096                        DAG.getConstant(0, DL, X.getValueType()),
3097                        NewY.getValue(1));
3098   };
3099 
3100   /**
3101    *      (uaddo A, B)
3102    *           |
3103    *          Sum
3104    *           |
3105    * (addcarry *, 0, Z)
3106    */
3107   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3108     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3109   }
3110 
3111   /**
3112    * (addcarry A, 0, Z)
3113    *         |
3114    *        Sum
3115    *         |
3116    *  (uaddo *, B)
3117    */
3118   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3119     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3120   }
3121 
3122   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3123     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3124   }
3125 
3126   return SDValue();
3127 }
3128 
3129 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3130 // match patterns like:
3131 //
3132 //          (uaddo A, B)            CarryIn
3133 //            |  \                     |
3134 //            |   \                    |
3135 //    PartialSum   PartialCarryOutX   /
3136 //            |        |             /
3137 //            |    ____|____________/
3138 //            |   /    |
3139 //     (uaddo *, *)    \________
3140 //       |  \                   \
3141 //       |   \                   |
3142 //       |    PartialCarryOutY   |
3143 //       |        \              |
3144 //       |         \            /
3145 //   AddCarrySum    |    ______/
3146 //                  |   /
3147 //   CarryOut = (or *, *)
3148 //
3149 // And generate ADDCARRY (or SUBCARRY) with two result values:
3150 //
3151 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3152 //
3153 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3154 // a single path for carry/borrow out propagation:
3155 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3156                                    SDValue N0, SDValue N1, SDNode *N) {
3157   SDValue Carry0 = getAsCarry(TLI, N0);
3158   if (!Carry0)
3159     return SDValue();
3160   SDValue Carry1 = getAsCarry(TLI, N1);
3161   if (!Carry1)
3162     return SDValue();
3163 
3164   unsigned Opcode = Carry0.getOpcode();
3165   if (Opcode != Carry1.getOpcode())
3166     return SDValue();
3167   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3168     return SDValue();
3169 
3170   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3171   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3172   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3173     std::swap(Carry0, Carry1);
3174 
3175   // Check if nodes are connected in expected way.
3176   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3177       Carry1.getOperand(1) != Carry0.getValue(0))
3178     return SDValue();
3179 
3180   // The carry in value must be on the righthand side for subtraction.
3181   unsigned CarryInOperandNum =
3182       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3183   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3184     return SDValue();
3185   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3186 
3187   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3188   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3189     return SDValue();
3190 
3191   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3192   // TODO: make getAsCarry() aware of how partial carries are merged.
3193   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3194     return SDValue();
3195   CarryIn = CarryIn.getOperand(0);
3196   if (CarryIn.getValueType() != MVT::i1)
3197     return SDValue();
3198 
3199   SDLoc DL(N);
3200   SDValue Merged =
3201       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3202                   Carry0.getOperand(1), CarryIn);
3203 
3204   // Please note that because we have proven that the result of the UADDO/USUBO
3205   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3206   // therefore prove that if the first UADDO/USUBO overflows, the second
3207   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3208   // maximum value.
3209   //
3210   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3211   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3212   //
3213   // This is important because it means that OR and XOR can be used to merge
3214   // carry flags; and that AND can return a constant zero.
3215   //
3216   // TODO: match other operations that can merge flags (ADD, etc)
3217   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3218   if (N->getOpcode() == ISD::AND)
3219     return DAG.getConstant(0, DL, MVT::i1);
3220   return Merged.getValue(1);
3221 }
3222 
3223 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3224                                        SDNode *N) {
3225   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3226   if (isBitwiseNot(N0))
3227     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3228       SDLoc DL(N);
3229       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3230                                 N0.getOperand(0), NotC);
3231       return CombineTo(
3232           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3233     }
3234 
3235   // Iff the flag result is dead:
3236   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3237   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3238   // or the dependency between the instructions.
3239   if ((N0.getOpcode() == ISD::ADD ||
3240        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3241         N0.getValue(1) != CarryIn)) &&
3242       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3243     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3244                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3245 
3246   /**
3247    * When one of the addcarry argument is itself a carry, we may be facing
3248    * a diamond carry propagation. In which case we try to transform the DAG
3249    * to ensure linear carry propagation if that is possible.
3250    */
3251   if (auto Y = getAsCarry(TLI, N1)) {
3252     // Because both are carries, Y and Z can be swapped.
3253     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3254       return R;
3255     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3256       return R;
3257   }
3258 
3259   return SDValue();
3260 }
3261 
3262 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3263 // clamp/truncation if necessary.
3264 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3265                                    SDValue RHS, SelectionDAG &DAG,
3266                                    const SDLoc &DL) {
3267   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3268          "Illegal truncation");
3269 
3270   if (DstVT == SrcVT)
3271     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3272 
3273   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3274   // clamping RHS.
3275   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3276                                           DstVT.getScalarSizeInBits());
3277   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3278     return SDValue();
3279 
3280   SDValue SatLimit =
3281       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3282                                            DstVT.getScalarSizeInBits()),
3283                       DL, SrcVT);
3284   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3285   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3286   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3287   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3288 }
3289 
3290 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3291 // usubsat(a,b), optionally as a truncated type.
3292 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3293   if (N->getOpcode() != ISD::SUB ||
3294       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3295     return SDValue();
3296 
3297   EVT SubVT = N->getValueType(0);
3298   SDValue Op0 = N->getOperand(0);
3299   SDValue Op1 = N->getOperand(1);
3300 
3301   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3302   // they may be converted to usubsat(a,b).
3303   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3304     SDValue MaxLHS = Op0.getOperand(0);
3305     SDValue MaxRHS = Op0.getOperand(1);
3306     if (MaxLHS == Op1)
3307       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3308     if (MaxRHS == Op1)
3309       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3310   }
3311 
3312   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3313     SDValue MinLHS = Op1.getOperand(0);
3314     SDValue MinRHS = Op1.getOperand(1);
3315     if (MinLHS == Op0)
3316       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3317     if (MinRHS == Op0)
3318       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3319   }
3320 
3321   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3322   if (Op1.getOpcode() == ISD::TRUNCATE &&
3323       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3324       Op1.getOperand(0).hasOneUse()) {
3325     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3326     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3327     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3328       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3329                                  DAG, SDLoc(N));
3330     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3331       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3332                                  DAG, SDLoc(N));
3333   }
3334 
3335   return SDValue();
3336 }
3337 
3338 // Since it may not be valid to emit a fold to zero for vector initializers
3339 // check if we can before folding.
3340 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3341                              SelectionDAG &DAG, bool LegalOperations) {
3342   if (!VT.isVector())
3343     return DAG.getConstant(0, DL, VT);
3344   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3345     return DAG.getConstant(0, DL, VT);
3346   return SDValue();
3347 }
3348 
3349 SDValue DAGCombiner::visitSUB(SDNode *N) {
3350   SDValue N0 = N->getOperand(0);
3351   SDValue N1 = N->getOperand(1);
3352   EVT VT = N0.getValueType();
3353   SDLoc DL(N);
3354 
3355   auto PeekThroughFreeze = [](SDValue N) {
3356     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3357       return N->getOperand(0);
3358     return N;
3359   };
3360 
3361   // fold (sub x, x) -> 0
3362   // FIXME: Refactor this and xor and other similar operations together.
3363   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3364     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3365 
3366   // fold (sub c1, c2) -> c3
3367   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3368     return C;
3369 
3370   // fold vector ops
3371   if (VT.isVector()) {
3372     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3373       return FoldedVOp;
3374 
3375     // fold (sub x, 0) -> x, vector edition
3376     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3377       return N0;
3378   }
3379 
3380   if (SDValue NewSel = foldBinOpIntoSelect(N))
3381     return NewSel;
3382 
3383   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3384 
3385   // fold (sub x, c) -> (add x, -c)
3386   if (N1C) {
3387     return DAG.getNode(ISD::ADD, DL, VT, N0,
3388                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3389   }
3390 
3391   if (isNullOrNullSplat(N0)) {
3392     unsigned BitWidth = VT.getScalarSizeInBits();
3393     // Right-shifting everything out but the sign bit followed by negation is
3394     // the same as flipping arithmetic/logical shift type without the negation:
3395     // -(X >>u 31) -> (X >>s 31)
3396     // -(X >>s 31) -> (X >>u 31)
3397     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3398       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3399       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3400         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3401         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3402           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3403       }
3404     }
3405 
3406     // 0 - X --> 0 if the sub is NUW.
3407     if (N->getFlags().hasNoUnsignedWrap())
3408       return N0;
3409 
3410     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3411       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3412       // N1 must be 0 because negating the minimum signed value is undefined.
3413       if (N->getFlags().hasNoSignedWrap())
3414         return N0;
3415 
3416       // 0 - X --> X if X is 0 or the minimum signed value.
3417       return N1;
3418     }
3419 
3420     // Convert 0 - abs(x).
3421     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3422         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3423       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3424         return Result;
3425 
3426     // Fold neg(splat(neg(x)) -> splat(x)
3427     if (VT.isVector()) {
3428       SDValue N1S = DAG.getSplatValue(N1, true);
3429       if (N1S && N1S.getOpcode() == ISD::SUB &&
3430           isNullConstant(N1S.getOperand(0))) {
3431         if (VT.isScalableVector())
3432           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3433         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3434       }
3435     }
3436   }
3437 
3438   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3439   if (isAllOnesOrAllOnesSplat(N0))
3440     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3441 
3442   // fold (A - (0-B)) -> A+B
3443   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3444     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3445 
3446   // fold A-(A-B) -> B
3447   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3448     return N1.getOperand(1);
3449 
3450   // fold (A+B)-A -> B
3451   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3452     return N0.getOperand(1);
3453 
3454   // fold (A+B)-B -> A
3455   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3456     return N0.getOperand(0);
3457 
3458   // fold (A+C1)-C2 -> A+(C1-C2)
3459   if (N0.getOpcode() == ISD::ADD &&
3460       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3461       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3462     SDValue NewC =
3463         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3464     assert(NewC && "Constant folding failed");
3465     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3466   }
3467 
3468   // fold C2-(A+C1) -> (C2-C1)-A
3469   if (N1.getOpcode() == ISD::ADD) {
3470     SDValue N11 = N1.getOperand(1);
3471     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3472         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3473       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3474       assert(NewC && "Constant folding failed");
3475       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3476     }
3477   }
3478 
3479   // fold (A-C1)-C2 -> A-(C1+C2)
3480   if (N0.getOpcode() == ISD::SUB &&
3481       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3482       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3483     SDValue NewC =
3484         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3485     assert(NewC && "Constant folding failed");
3486     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3487   }
3488 
3489   // fold (c1-A)-c2 -> (c1-c2)-A
3490   if (N0.getOpcode() == ISD::SUB &&
3491       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3492       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3493     SDValue NewC =
3494         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3495     assert(NewC && "Constant folding failed");
3496     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3497   }
3498 
3499   // fold ((A+(B+or-C))-B) -> A+or-C
3500   if (N0.getOpcode() == ISD::ADD &&
3501       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3502        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3503       N0.getOperand(1).getOperand(0) == N1)
3504     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3505                        N0.getOperand(1).getOperand(1));
3506 
3507   // fold ((A+(C+B))-B) -> A+C
3508   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3509       N0.getOperand(1).getOperand(1) == N1)
3510     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3511                        N0.getOperand(1).getOperand(0));
3512 
3513   // fold ((A-(B-C))-C) -> A-B
3514   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3515       N0.getOperand(1).getOperand(1) == N1)
3516     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3517                        N0.getOperand(1).getOperand(0));
3518 
3519   // fold (A-(B-C)) -> A+(C-B)
3520   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3521     return DAG.getNode(ISD::ADD, DL, VT, N0,
3522                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3523                                    N1.getOperand(0)));
3524 
3525   // A - (A & B)  ->  A & (~B)
3526   if (N1.getOpcode() == ISD::AND) {
3527     SDValue A = N1.getOperand(0);
3528     SDValue B = N1.getOperand(1);
3529     if (A != N0)
3530       std::swap(A, B);
3531     if (A == N0 &&
3532         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3533       SDValue InvB =
3534           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3535       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3536     }
3537   }
3538 
3539   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3540   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3541     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3542         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3543       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3544                                 N1.getOperand(0).getOperand(1),
3545                                 N1.getOperand(1));
3546       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3547     }
3548     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3549         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3550       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3551                                 N1.getOperand(0),
3552                                 N1.getOperand(1).getOperand(1));
3553       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3554     }
3555   }
3556 
3557   // If either operand of a sub is undef, the result is undef
3558   if (N0.isUndef())
3559     return N0;
3560   if (N1.isUndef())
3561     return N1;
3562 
3563   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3564     return V;
3565 
3566   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3567     return V;
3568 
3569   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3570     return V;
3571 
3572   if (SDValue V = foldSubToUSubSat(VT, N))
3573     return V;
3574 
3575   // (x - y) - 1  ->  add (xor y, -1), x
3576   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3577     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3578                               DAG.getAllOnesConstant(DL, VT));
3579     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3580   }
3581 
3582   // Look for:
3583   //   sub y, (xor x, -1)
3584   // And if the target does not like this form then turn into:
3585   //   add (add x, y), 1
3586   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3587     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3588     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3589   }
3590 
3591   // Hoist one-use addition by non-opaque constant:
3592   //   (x + C) - y  ->  (x - y) + C
3593   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3594       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3595     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3596     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3597   }
3598   // y - (x + C)  ->  (y - x) - C
3599   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3600       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3601     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3602     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3603   }
3604   // (x - C) - y  ->  (x - y) - C
3605   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3606   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3607       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3608     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3609     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3610   }
3611   // (C - x) - y  ->  C - (x + y)
3612   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3613       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3614     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3615     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3616   }
3617 
3618   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3619   // rather than 'sub 0/1' (the sext should get folded).
3620   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3621   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3622       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3623       TLI.getBooleanContents(VT) ==
3624           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3625     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3626     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3627   }
3628 
3629   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3630   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3631     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3632       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3633       SDValue S0 = N1.getOperand(0);
3634       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3635         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3636           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3637             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3638     }
3639   }
3640 
3641   // If the relocation model supports it, consider symbol offsets.
3642   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3643     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3644       // fold (sub Sym, c) -> Sym-c
3645       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3646         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3647                                     GA->getOffset() -
3648                                         (uint64_t)N1C->getSExtValue());
3649       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3650       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3651         if (GA->getGlobal() == GB->getGlobal())
3652           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3653                                  DL, VT);
3654     }
3655 
3656   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3657   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3658     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3659     if (TN->getVT() == MVT::i1) {
3660       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3661                                  DAG.getConstant(1, DL, VT));
3662       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3663     }
3664   }
3665 
3666   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3667   if (N1.getOpcode() == ISD::VSCALE) {
3668     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3669     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3670   }
3671 
3672   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3673   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3674     APInt NewStep = -N1.getConstantOperandAPInt(0);
3675     return DAG.getNode(ISD::ADD, DL, VT, N0,
3676                        DAG.getStepVector(DL, VT, NewStep));
3677   }
3678 
3679   // Prefer an add for more folding potential and possibly better codegen:
3680   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3681   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3682     SDValue ShAmt = N1.getOperand(1);
3683     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3684     if (ShAmtC &&
3685         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3686       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3687       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3688     }
3689   }
3690 
3691   // As with the previous fold, prefer add for more folding potential.
3692   // Subtracting SMIN/0 is the same as adding SMIN/0:
3693   // N0 - (X << BW-1) --> N0 + (X << BW-1)
3694   if (N1.getOpcode() == ISD::SHL) {
3695     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
3696     if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3697       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3698   }
3699 
3700   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3701     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3702     if (SDValue Carry = getAsCarry(TLI, N0)) {
3703       SDValue X = N1;
3704       SDValue Zero = DAG.getConstant(0, DL, VT);
3705       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3706       return DAG.getNode(ISD::ADDCARRY, DL,
3707                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3708                          Carry);
3709     }
3710   }
3711 
3712   return SDValue();
3713 }
3714 
3715 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3716   SDValue N0 = N->getOperand(0);
3717   SDValue N1 = N->getOperand(1);
3718   EVT VT = N0.getValueType();
3719   SDLoc DL(N);
3720 
3721   // fold (sub_sat x, undef) -> 0
3722   if (N0.isUndef() || N1.isUndef())
3723     return DAG.getConstant(0, DL, VT);
3724 
3725   // fold (sub_sat x, x) -> 0
3726   if (N0 == N1)
3727     return DAG.getConstant(0, DL, VT);
3728 
3729   // fold (sub_sat c1, c2) -> c3
3730   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3731     return C;
3732 
3733   // fold vector ops
3734   if (VT.isVector()) {
3735     // TODO SimplifyVBinOp
3736 
3737     // fold (sub_sat x, 0) -> x, vector edition
3738     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3739       return N0;
3740   }
3741 
3742   // fold (sub_sat x, 0) -> x
3743   if (isNullConstant(N1))
3744     return N0;
3745 
3746   return SDValue();
3747 }
3748 
3749 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3750   SDValue N0 = N->getOperand(0);
3751   SDValue N1 = N->getOperand(1);
3752   EVT VT = N0.getValueType();
3753   SDLoc DL(N);
3754 
3755   // If the flag result is dead, turn this into an SUB.
3756   if (!N->hasAnyUseOfValue(1))
3757     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3758                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3759 
3760   // fold (subc x, x) -> 0 + no borrow
3761   if (N0 == N1)
3762     return CombineTo(N, DAG.getConstant(0, DL, VT),
3763                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3764 
3765   // fold (subc x, 0) -> x + no borrow
3766   if (isNullConstant(N1))
3767     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3768 
3769   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3770   if (isAllOnesConstant(N0))
3771     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3772                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3773 
3774   return SDValue();
3775 }
3776 
3777 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3778   SDValue N0 = N->getOperand(0);
3779   SDValue N1 = N->getOperand(1);
3780   EVT VT = N0.getValueType();
3781   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3782 
3783   EVT CarryVT = N->getValueType(1);
3784   SDLoc DL(N);
3785 
3786   // If the flag result is dead, turn this into an SUB.
3787   if (!N->hasAnyUseOfValue(1))
3788     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3789                      DAG.getUNDEF(CarryVT));
3790 
3791   // fold (subo x, x) -> 0 + no borrow
3792   if (N0 == N1)
3793     return CombineTo(N, DAG.getConstant(0, DL, VT),
3794                      DAG.getConstant(0, DL, CarryVT));
3795 
3796   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3797 
3798   // fold (subox, c) -> (addo x, -c)
3799   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3800     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3801                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3802   }
3803 
3804   // fold (subo x, 0) -> x + no borrow
3805   if (isNullOrNullSplat(N1))
3806     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3807 
3808   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3809   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3810     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3811                      DAG.getConstant(0, DL, CarryVT));
3812 
3813   return SDValue();
3814 }
3815 
3816 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3817   SDValue N0 = N->getOperand(0);
3818   SDValue N1 = N->getOperand(1);
3819   SDValue CarryIn = N->getOperand(2);
3820 
3821   // fold (sube x, y, false) -> (subc x, y)
3822   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3823     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3824 
3825   return SDValue();
3826 }
3827 
3828 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3829   SDValue N0 = N->getOperand(0);
3830   SDValue N1 = N->getOperand(1);
3831   SDValue CarryIn = N->getOperand(2);
3832 
3833   // fold (subcarry x, y, false) -> (usubo x, y)
3834   if (isNullConstant(CarryIn)) {
3835     if (!LegalOperations ||
3836         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3837       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3838   }
3839 
3840   return SDValue();
3841 }
3842 
3843 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3844   SDValue N0 = N->getOperand(0);
3845   SDValue N1 = N->getOperand(1);
3846   SDValue CarryIn = N->getOperand(2);
3847 
3848   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3849   if (isNullConstant(CarryIn)) {
3850     if (!LegalOperations ||
3851         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3852       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3853   }
3854 
3855   return SDValue();
3856 }
3857 
3858 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3859 // UMULFIXSAT here.
3860 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3861   SDValue N0 = N->getOperand(0);
3862   SDValue N1 = N->getOperand(1);
3863   SDValue Scale = N->getOperand(2);
3864   EVT VT = N0.getValueType();
3865 
3866   // fold (mulfix x, undef, scale) -> 0
3867   if (N0.isUndef() || N1.isUndef())
3868     return DAG.getConstant(0, SDLoc(N), VT);
3869 
3870   // Canonicalize constant to RHS (vector doesn't have to splat)
3871   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3872      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3873     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3874 
3875   // fold (mulfix x, 0, scale) -> 0
3876   if (isNullConstant(N1))
3877     return DAG.getConstant(0, SDLoc(N), VT);
3878 
3879   return SDValue();
3880 }
3881 
3882 SDValue DAGCombiner::visitMUL(SDNode *N) {
3883   SDValue N0 = N->getOperand(0);
3884   SDValue N1 = N->getOperand(1);
3885   EVT VT = N0.getValueType();
3886 
3887   // fold (mul x, undef) -> 0
3888   if (N0.isUndef() || N1.isUndef())
3889     return DAG.getConstant(0, SDLoc(N), VT);
3890 
3891   // fold (mul c1, c2) -> c1*c2
3892   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3893     return C;
3894 
3895   // canonicalize constant to RHS (vector doesn't have to splat)
3896   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3897       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3898     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3899 
3900   bool N1IsConst = false;
3901   bool N1IsOpaqueConst = false;
3902   APInt ConstValue1;
3903 
3904   // fold vector ops
3905   if (VT.isVector()) {
3906     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
3907       return FoldedVOp;
3908 
3909     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3910     assert((!N1IsConst ||
3911             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3912            "Splat APInt should be element width");
3913   } else {
3914     N1IsConst = isa<ConstantSDNode>(N1);
3915     if (N1IsConst) {
3916       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3917       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3918     }
3919   }
3920 
3921   // fold (mul x, 0) -> 0
3922   if (N1IsConst && ConstValue1.isZero())
3923     return N1;
3924 
3925   // fold (mul x, 1) -> x
3926   if (N1IsConst && ConstValue1.isOne())
3927     return N0;
3928 
3929   if (SDValue NewSel = foldBinOpIntoSelect(N))
3930     return NewSel;
3931 
3932   // fold (mul x, -1) -> 0-x
3933   if (N1IsConst && ConstValue1.isAllOnes()) {
3934     SDLoc DL(N);
3935     return DAG.getNode(ISD::SUB, DL, VT,
3936                        DAG.getConstant(0, DL, VT), N0);
3937   }
3938 
3939   // fold (mul x, (1 << c)) -> x << c
3940   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3941       DAG.isKnownToBeAPowerOfTwo(N1) &&
3942       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3943     SDLoc DL(N);
3944     SDValue LogBase2 = BuildLogBase2(N1, DL);
3945     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3946     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3947     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3948   }
3949 
3950   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3951   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
3952     unsigned Log2Val = (-ConstValue1).logBase2();
3953     SDLoc DL(N);
3954     // FIXME: If the input is something that is easily negated (e.g. a
3955     // single-use add), we should put the negate there.
3956     return DAG.getNode(ISD::SUB, DL, VT,
3957                        DAG.getConstant(0, DL, VT),
3958                        DAG.getNode(ISD::SHL, DL, VT, N0,
3959                             DAG.getConstant(Log2Val, DL,
3960                                       getShiftAmountTy(N0.getValueType()))));
3961   }
3962 
3963   // Try to transform:
3964   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3965   // mul x, (2^N + 1) --> add (shl x, N), x
3966   // mul x, (2^N - 1) --> sub (shl x, N), x
3967   // Examples: x * 33 --> (x << 5) + x
3968   //           x * 15 --> (x << 4) - x
3969   //           x * -33 --> -((x << 5) + x)
3970   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3971   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3972   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3973   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3974   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3975   //           x * 0xf800 --> (x << 16) - (x << 11)
3976   //           x * -0x8800 --> -((x << 15) + (x << 11))
3977   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3978   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3979     // TODO: We could handle more general decomposition of any constant by
3980     //       having the target set a limit on number of ops and making a
3981     //       callback to determine that sequence (similar to sqrt expansion).
3982     unsigned MathOp = ISD::DELETED_NODE;
3983     APInt MulC = ConstValue1.abs();
3984     // The constant `2` should be treated as (2^0 + 1).
3985     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3986     MulC.lshrInPlace(TZeros);
3987     if ((MulC - 1).isPowerOf2())
3988       MathOp = ISD::ADD;
3989     else if ((MulC + 1).isPowerOf2())
3990       MathOp = ISD::SUB;
3991 
3992     if (MathOp != ISD::DELETED_NODE) {
3993       unsigned ShAmt =
3994           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3995       ShAmt += TZeros;
3996       assert(ShAmt < VT.getScalarSizeInBits() &&
3997              "multiply-by-constant generated out of bounds shift");
3998       SDLoc DL(N);
3999       SDValue Shl =
4000           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4001       SDValue R =
4002           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4003                                DAG.getNode(ISD::SHL, DL, VT, N0,
4004                                            DAG.getConstant(TZeros, DL, VT)))
4005                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4006       if (ConstValue1.isNegative())
4007         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
4008       return R;
4009     }
4010   }
4011 
4012   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4013   if (N0.getOpcode() == ISD::SHL &&
4014       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
4015       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
4016     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
4017     if (isConstantOrConstantVector(C3))
4018       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
4019   }
4020 
4021   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4022   // use.
4023   {
4024     SDValue Sh, Y;
4025 
4026     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4027     if (N0.getOpcode() == ISD::SHL &&
4028         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4029       Sh = N0; Y = N1;
4030     } else if (N1.getOpcode() == ISD::SHL &&
4031                isConstantOrConstantVector(N1.getOperand(1)) &&
4032                N1->hasOneUse()) {
4033       Sh = N1; Y = N0;
4034     }
4035 
4036     if (Sh.getNode()) {
4037       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
4038       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
4039     }
4040   }
4041 
4042   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4043   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4044       N0.getOpcode() == ISD::ADD &&
4045       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4046       isMulAddWithConstProfitable(N, N0, N1))
4047       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
4048                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
4049                                      N0.getOperand(0), N1),
4050                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
4051                                      N0.getOperand(1), N1));
4052 
4053   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4054   if (N0.getOpcode() == ISD::VSCALE)
4055     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4056       const APInt &C0 = N0.getConstantOperandAPInt(0);
4057       const APInt &C1 = NC1->getAPIntValue();
4058       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
4059     }
4060 
4061   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4062   APInt MulVal;
4063   if (N0.getOpcode() == ISD::STEP_VECTOR)
4064     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4065       const APInt &C0 = N0.getConstantOperandAPInt(0);
4066       APInt NewStep = C0 * MulVal;
4067       return DAG.getStepVector(SDLoc(N), VT, NewStep);
4068     }
4069 
4070   // Fold ((mul x, 0/undef) -> 0,
4071   //       (mul x, 1) -> x) -> x)
4072   // -> and(x, mask)
4073   // We can replace vectors with '0' and '1' factors with a clearing mask.
4074   if (VT.isFixedLengthVector()) {
4075     unsigned NumElts = VT.getVectorNumElements();
4076     SmallBitVector ClearMask;
4077     ClearMask.reserve(NumElts);
4078     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4079       if (!V || V->isZero()) {
4080         ClearMask.push_back(true);
4081         return true;
4082       }
4083       ClearMask.push_back(false);
4084       return V->isOne();
4085     };
4086     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4087         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4088       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4089       SDLoc DL(N);
4090       EVT LegalSVT = N1.getOperand(0).getValueType();
4091       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4092       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4093       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4094       for (unsigned I = 0; I != NumElts; ++I)
4095         if (ClearMask[I])
4096           Mask[I] = Zero;
4097       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4098     }
4099   }
4100 
4101   // reassociate mul
4102   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
4103     return RMUL;
4104 
4105   return SDValue();
4106 }
4107 
4108 /// Return true if divmod libcall is available.
4109 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4110                                      const TargetLowering &TLI) {
4111   RTLIB::Libcall LC;
4112   EVT NodeType = Node->getValueType(0);
4113   if (!NodeType.isSimple())
4114     return false;
4115   switch (NodeType.getSimpleVT().SimpleTy) {
4116   default: return false; // No libcall for vector types.
4117   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4118   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4119   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4120   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4121   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4122   }
4123 
4124   return TLI.getLibcallName(LC) != nullptr;
4125 }
4126 
4127 /// Issue divrem if both quotient and remainder are needed.
4128 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4129   if (Node->use_empty())
4130     return SDValue(); // This is a dead node, leave it alone.
4131 
4132   unsigned Opcode = Node->getOpcode();
4133   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4134   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4135 
4136   // DivMod lib calls can still work on non-legal types if using lib-calls.
4137   EVT VT = Node->getValueType(0);
4138   if (VT.isVector() || !VT.isInteger())
4139     return SDValue();
4140 
4141   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4142     return SDValue();
4143 
4144   // If DIVREM is going to get expanded into a libcall,
4145   // but there is no libcall available, then don't combine.
4146   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4147       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4148     return SDValue();
4149 
4150   // If div is legal, it's better to do the normal expansion
4151   unsigned OtherOpcode = 0;
4152   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4153     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4154     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4155       return SDValue();
4156   } else {
4157     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4158     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4159       return SDValue();
4160   }
4161 
4162   SDValue Op0 = Node->getOperand(0);
4163   SDValue Op1 = Node->getOperand(1);
4164   SDValue combined;
4165   for (SDNode *User : Op0->uses()) {
4166     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4167         User->use_empty())
4168       continue;
4169     // Convert the other matching node(s), too;
4170     // otherwise, the DIVREM may get target-legalized into something
4171     // target-specific that we won't be able to recognize.
4172     unsigned UserOpc = User->getOpcode();
4173     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4174         User->getOperand(0) == Op0 &&
4175         User->getOperand(1) == Op1) {
4176       if (!combined) {
4177         if (UserOpc == OtherOpcode) {
4178           SDVTList VTs = DAG.getVTList(VT, VT);
4179           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4180         } else if (UserOpc == DivRemOpc) {
4181           combined = SDValue(User, 0);
4182         } else {
4183           assert(UserOpc == Opcode);
4184           continue;
4185         }
4186       }
4187       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4188         CombineTo(User, combined);
4189       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4190         CombineTo(User, combined.getValue(1));
4191     }
4192   }
4193   return combined;
4194 }
4195 
4196 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4197   SDValue N0 = N->getOperand(0);
4198   SDValue N1 = N->getOperand(1);
4199   EVT VT = N->getValueType(0);
4200   SDLoc DL(N);
4201 
4202   unsigned Opc = N->getOpcode();
4203   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4204   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4205 
4206   // X / undef -> undef
4207   // X % undef -> undef
4208   // X / 0 -> undef
4209   // X % 0 -> undef
4210   // NOTE: This includes vectors where any divisor element is zero/undef.
4211   if (DAG.isUndef(Opc, {N0, N1}))
4212     return DAG.getUNDEF(VT);
4213 
4214   // undef / X -> 0
4215   // undef % X -> 0
4216   if (N0.isUndef())
4217     return DAG.getConstant(0, DL, VT);
4218 
4219   // 0 / X -> 0
4220   // 0 % X -> 0
4221   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4222   if (N0C && N0C->isZero())
4223     return N0;
4224 
4225   // X / X -> 1
4226   // X % X -> 0
4227   if (N0 == N1)
4228     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4229 
4230   // X / 1 -> X
4231   // X % 1 -> 0
4232   // If this is a boolean op (single-bit element type), we can't have
4233   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4234   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4235   // it's a 1.
4236   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4237     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4238 
4239   return SDValue();
4240 }
4241 
4242 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4243   SDValue N0 = N->getOperand(0);
4244   SDValue N1 = N->getOperand(1);
4245   EVT VT = N->getValueType(0);
4246   EVT CCVT = getSetCCResultType(VT);
4247   SDLoc DL(N);
4248 
4249   // fold (sdiv c1, c2) -> c1/c2
4250   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4251     return C;
4252 
4253   // fold vector ops
4254   if (VT.isVector())
4255     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4256       return FoldedVOp;
4257 
4258   // fold (sdiv X, -1) -> 0-X
4259   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4260   if (N1C && N1C->isAllOnes())
4261     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4262 
4263   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4264   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4265     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4266                          DAG.getConstant(1, DL, VT),
4267                          DAG.getConstant(0, DL, VT));
4268 
4269   if (SDValue V = simplifyDivRem(N, DAG))
4270     return V;
4271 
4272   if (SDValue NewSel = foldBinOpIntoSelect(N))
4273     return NewSel;
4274 
4275   // If we know the sign bits of both operands are zero, strength reduce to a
4276   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4277   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4278     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4279 
4280   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4281     // If the corresponding remainder node exists, update its users with
4282     // (Dividend - (Quotient * Divisor).
4283     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4284                                               { N0, N1 })) {
4285       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4286       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4287       AddToWorklist(Mul.getNode());
4288       AddToWorklist(Sub.getNode());
4289       CombineTo(RemNode, Sub);
4290     }
4291     return V;
4292   }
4293 
4294   // sdiv, srem -> sdivrem
4295   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4296   // true.  Otherwise, we break the simplification logic in visitREM().
4297   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4298   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4299     if (SDValue DivRem = useDivRem(N))
4300         return DivRem;
4301 
4302   return SDValue();
4303 }
4304 
4305 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4306   SDLoc DL(N);
4307   EVT VT = N->getValueType(0);
4308   EVT CCVT = getSetCCResultType(VT);
4309   unsigned BitWidth = VT.getScalarSizeInBits();
4310 
4311   // Helper for determining whether a value is a power-2 constant scalar or a
4312   // vector of such elements.
4313   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4314     if (C->isZero() || C->isOpaque())
4315       return false;
4316     if (C->getAPIntValue().isPowerOf2())
4317       return true;
4318     if (C->getAPIntValue().isNegatedPowerOf2())
4319       return true;
4320     return false;
4321   };
4322 
4323   // fold (sdiv X, pow2) -> simple ops after legalize
4324   // FIXME: We check for the exact bit here because the generic lowering gives
4325   // better results in that case. The target-specific lowering should learn how
4326   // to handle exact sdivs efficiently.
4327   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4328     // Target-specific implementation of sdiv x, pow2.
4329     if (SDValue Res = BuildSDIVPow2(N))
4330       return Res;
4331 
4332     // Create constants that are functions of the shift amount value.
4333     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4334     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4335     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4336     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4337     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4338     if (!isConstantOrConstantVector(Inexact))
4339       return SDValue();
4340 
4341     // Splat the sign bit into the register
4342     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4343                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4344     AddToWorklist(Sign.getNode());
4345 
4346     // Add (N0 < 0) ? abs2 - 1 : 0;
4347     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4348     AddToWorklist(Srl.getNode());
4349     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4350     AddToWorklist(Add.getNode());
4351     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4352     AddToWorklist(Sra.getNode());
4353 
4354     // Special case: (sdiv X, 1) -> X
4355     // Special Case: (sdiv X, -1) -> 0-X
4356     SDValue One = DAG.getConstant(1, DL, VT);
4357     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4358     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4359     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4360     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4361     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4362 
4363     // If dividing by a positive value, we're done. Otherwise, the result must
4364     // be negated.
4365     SDValue Zero = DAG.getConstant(0, DL, VT);
4366     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4367 
4368     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4369     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4370     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4371     return Res;
4372   }
4373 
4374   // If integer divide is expensive and we satisfy the requirements, emit an
4375   // alternate sequence.  Targets may check function attributes for size/speed
4376   // trade-offs.
4377   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4378   if (isConstantOrConstantVector(N1) &&
4379       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4380     if (SDValue Op = BuildSDIV(N))
4381       return Op;
4382 
4383   return SDValue();
4384 }
4385 
4386 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4387   SDValue N0 = N->getOperand(0);
4388   SDValue N1 = N->getOperand(1);
4389   EVT VT = N->getValueType(0);
4390   EVT CCVT = getSetCCResultType(VT);
4391   SDLoc DL(N);
4392 
4393   // fold (udiv c1, c2) -> c1/c2
4394   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4395     return C;
4396 
4397   // fold vector ops
4398   if (VT.isVector())
4399     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4400       return FoldedVOp;
4401 
4402   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4403   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4404   if (N1C && N1C->isAllOnes())
4405     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4406                          DAG.getConstant(1, DL, VT),
4407                          DAG.getConstant(0, DL, VT));
4408 
4409   if (SDValue V = simplifyDivRem(N, DAG))
4410     return V;
4411 
4412   if (SDValue NewSel = foldBinOpIntoSelect(N))
4413     return NewSel;
4414 
4415   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4416     // If the corresponding remainder node exists, update its users with
4417     // (Dividend - (Quotient * Divisor).
4418     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4419                                               { N0, N1 })) {
4420       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4421       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4422       AddToWorklist(Mul.getNode());
4423       AddToWorklist(Sub.getNode());
4424       CombineTo(RemNode, Sub);
4425     }
4426     return V;
4427   }
4428 
4429   // sdiv, srem -> sdivrem
4430   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4431   // true.  Otherwise, we break the simplification logic in visitREM().
4432   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4433   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4434     if (SDValue DivRem = useDivRem(N))
4435         return DivRem;
4436 
4437   return SDValue();
4438 }
4439 
4440 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4441   SDLoc DL(N);
4442   EVT VT = N->getValueType(0);
4443 
4444   // fold (udiv x, (1 << c)) -> x >>u c
4445   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4446       DAG.isKnownToBeAPowerOfTwo(N1)) {
4447     SDValue LogBase2 = BuildLogBase2(N1, DL);
4448     AddToWorklist(LogBase2.getNode());
4449 
4450     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4451     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4452     AddToWorklist(Trunc.getNode());
4453     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4454   }
4455 
4456   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4457   if (N1.getOpcode() == ISD::SHL) {
4458     SDValue N10 = N1.getOperand(0);
4459     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4460         DAG.isKnownToBeAPowerOfTwo(N10)) {
4461       SDValue LogBase2 = BuildLogBase2(N10, DL);
4462       AddToWorklist(LogBase2.getNode());
4463 
4464       EVT ADDVT = N1.getOperand(1).getValueType();
4465       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4466       AddToWorklist(Trunc.getNode());
4467       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4468       AddToWorklist(Add.getNode());
4469       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4470     }
4471   }
4472 
4473   // fold (udiv x, c) -> alternate
4474   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4475   if (isConstantOrConstantVector(N1) &&
4476       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4477     if (SDValue Op = BuildUDIV(N))
4478       return Op;
4479 
4480   return SDValue();
4481 }
4482 
4483 // handles ISD::SREM and ISD::UREM
4484 SDValue DAGCombiner::visitREM(SDNode *N) {
4485   unsigned Opcode = N->getOpcode();
4486   SDValue N0 = N->getOperand(0);
4487   SDValue N1 = N->getOperand(1);
4488   EVT VT = N->getValueType(0);
4489   EVT CCVT = getSetCCResultType(VT);
4490 
4491   bool isSigned = (Opcode == ISD::SREM);
4492   SDLoc DL(N);
4493 
4494   // fold (rem c1, c2) -> c1%c2
4495   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4496   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4497     return C;
4498 
4499   // fold (urem X, -1) -> select(X == -1, 0, x)
4500   if (!isSigned && N1C && N1C->isAllOnes())
4501     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4502                          DAG.getConstant(0, DL, VT), N0);
4503 
4504   if (SDValue V = simplifyDivRem(N, DAG))
4505     return V;
4506 
4507   if (SDValue NewSel = foldBinOpIntoSelect(N))
4508     return NewSel;
4509 
4510   if (isSigned) {
4511     // If we know the sign bits of both operands are zero, strength reduce to a
4512     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4513     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4514       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4515   } else {
4516     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4517       // fold (urem x, pow2) -> (and x, pow2-1)
4518       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4519       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4520       AddToWorklist(Add.getNode());
4521       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4522     }
4523     if (N1.getOpcode() == ISD::SHL &&
4524         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4525       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4526       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4527       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4528       AddToWorklist(Add.getNode());
4529       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4530     }
4531   }
4532 
4533   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4534 
4535   // If X/C can be simplified by the division-by-constant logic, lower
4536   // X%C to the equivalent of X-X/C*C.
4537   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4538   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4539   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4540   // combine will not return a DIVREM.  Regardless, checking cheapness here
4541   // makes sense since the simplification results in fatter code.
4542   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4543     SDValue OptimizedDiv =
4544         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4545     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4546       // If the equivalent Div node also exists, update its users.
4547       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4548       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4549                                                 { N0, N1 }))
4550         CombineTo(DivNode, OptimizedDiv);
4551       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4552       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4553       AddToWorklist(OptimizedDiv.getNode());
4554       AddToWorklist(Mul.getNode());
4555       return Sub;
4556     }
4557   }
4558 
4559   // sdiv, srem -> sdivrem
4560   if (SDValue DivRem = useDivRem(N))
4561     return DivRem.getValue(1);
4562 
4563   return SDValue();
4564 }
4565 
4566 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4567   SDValue N0 = N->getOperand(0);
4568   SDValue N1 = N->getOperand(1);
4569   EVT VT = N->getValueType(0);
4570   SDLoc DL(N);
4571 
4572   // fold (mulhs c1, c2)
4573   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4574     return C;
4575 
4576   // canonicalize constant to RHS.
4577   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4578       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4579     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4580 
4581   if (VT.isVector()) {
4582     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4583       return FoldedVOp;
4584 
4585     // fold (mulhs x, 0) -> 0
4586     // do not return N1, because undef node may exist.
4587     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4588       return DAG.getConstant(0, DL, VT);
4589   }
4590 
4591   // fold (mulhs x, 0) -> 0
4592   if (isNullConstant(N1))
4593     return N1;
4594 
4595   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4596   if (isOneConstant(N1))
4597     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4598                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4599                                        getShiftAmountTy(N0.getValueType())));
4600 
4601   // fold (mulhs x, undef) -> 0
4602   if (N0.isUndef() || N1.isUndef())
4603     return DAG.getConstant(0, DL, VT);
4604 
4605   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4606   // plus a shift.
4607   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4608       !VT.isVector()) {
4609     MVT Simple = VT.getSimpleVT();
4610     unsigned SimpleSize = Simple.getSizeInBits();
4611     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4612     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4613       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4614       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4615       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4616       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4617             DAG.getConstant(SimpleSize, DL,
4618                             getShiftAmountTy(N1.getValueType())));
4619       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4620     }
4621   }
4622 
4623   return SDValue();
4624 }
4625 
4626 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4627   SDValue N0 = N->getOperand(0);
4628   SDValue N1 = N->getOperand(1);
4629   EVT VT = N->getValueType(0);
4630   SDLoc DL(N);
4631 
4632   // fold (mulhu c1, c2)
4633   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4634     return C;
4635 
4636   // canonicalize constant to RHS.
4637   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4638       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4639     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4640 
4641   if (VT.isVector()) {
4642     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4643       return FoldedVOp;
4644 
4645     // fold (mulhu x, 0) -> 0
4646     // do not return N1, because undef node may exist.
4647     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4648       return DAG.getConstant(0, DL, VT);
4649   }
4650 
4651   // fold (mulhu x, 0) -> 0
4652   if (isNullConstant(N1))
4653     return N1;
4654 
4655   // fold (mulhu x, 1) -> 0
4656   if (isOneConstant(N1))
4657     return DAG.getConstant(0, DL, N0.getValueType());
4658 
4659   // fold (mulhu x, undef) -> 0
4660   if (N0.isUndef() || N1.isUndef())
4661     return DAG.getConstant(0, DL, VT);
4662 
4663   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4664   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4665       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4666     unsigned NumEltBits = VT.getScalarSizeInBits();
4667     SDValue LogBase2 = BuildLogBase2(N1, DL);
4668     SDValue SRLAmt = DAG.getNode(
4669         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4670     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4671     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4672     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4673   }
4674 
4675   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4676   // plus a shift.
4677   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4678       !VT.isVector()) {
4679     MVT Simple = VT.getSimpleVT();
4680     unsigned SimpleSize = Simple.getSizeInBits();
4681     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4682     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4683       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4684       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4685       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4686       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4687             DAG.getConstant(SimpleSize, DL,
4688                             getShiftAmountTy(N1.getValueType())));
4689       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4690     }
4691   }
4692 
4693   // Simplify the operands using demanded-bits information.
4694   // We don't have demanded bits support for MULHU so this just enables constant
4695   // folding based on known bits.
4696   if (SimplifyDemandedBits(SDValue(N, 0)))
4697     return SDValue(N, 0);
4698 
4699   return SDValue();
4700 }
4701 
4702 SDValue DAGCombiner::visitAVG(SDNode *N) {
4703   unsigned Opcode = N->getOpcode();
4704   SDValue N0 = N->getOperand(0);
4705   SDValue N1 = N->getOperand(1);
4706   EVT VT = N->getValueType(0);
4707   SDLoc DL(N);
4708 
4709   // fold (avg c1, c2)
4710   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4711     return C;
4712 
4713   // canonicalize constant to RHS.
4714   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4715       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4716     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4717 
4718   if (VT.isVector()) {
4719     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4720       return FoldedVOp;
4721 
4722     // fold (avgfloor x, 0) -> x >> 1
4723     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4724       if (Opcode == ISD::AVGFLOORS)
4725         return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
4726       if (Opcode == ISD::AVGFLOORU)
4727         return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
4728     }
4729   }
4730 
4731   // fold (avg x, undef) -> x
4732   if (N0.isUndef())
4733     return N1;
4734   if (N1.isUndef())
4735     return N0;
4736 
4737   // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
4738 
4739   return SDValue();
4740 }
4741 
4742 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4743 /// give the opcodes for the two computations that are being performed. Return
4744 /// true if a simplification was made.
4745 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4746                                                 unsigned HiOp) {
4747   // If the high half is not needed, just compute the low half.
4748   bool HiExists = N->hasAnyUseOfValue(1);
4749   if (!HiExists && (!LegalOperations ||
4750                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4751     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4752     return CombineTo(N, Res, Res);
4753   }
4754 
4755   // If the low half is not needed, just compute the high half.
4756   bool LoExists = N->hasAnyUseOfValue(0);
4757   if (!LoExists && (!LegalOperations ||
4758                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4759     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4760     return CombineTo(N, Res, Res);
4761   }
4762 
4763   // If both halves are used, return as it is.
4764   if (LoExists && HiExists)
4765     return SDValue();
4766 
4767   // If the two computed results can be simplified separately, separate them.
4768   if (LoExists) {
4769     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4770     AddToWorklist(Lo.getNode());
4771     SDValue LoOpt = combine(Lo.getNode());
4772     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4773         (!LegalOperations ||
4774          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4775       return CombineTo(N, LoOpt, LoOpt);
4776   }
4777 
4778   if (HiExists) {
4779     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4780     AddToWorklist(Hi.getNode());
4781     SDValue HiOpt = combine(Hi.getNode());
4782     if (HiOpt.getNode() && HiOpt != Hi &&
4783         (!LegalOperations ||
4784          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4785       return CombineTo(N, HiOpt, HiOpt);
4786   }
4787 
4788   return SDValue();
4789 }
4790 
4791 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4792   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4793     return Res;
4794 
4795   EVT VT = N->getValueType(0);
4796   SDLoc DL(N);
4797 
4798   // If the type is twice as wide is legal, transform the mulhu to a wider
4799   // multiply plus a shift.
4800   if (VT.isSimple() && !VT.isVector()) {
4801     MVT Simple = VT.getSimpleVT();
4802     unsigned SimpleSize = Simple.getSizeInBits();
4803     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4804     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4805       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4806       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4807       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4808       // Compute the high part as N1.
4809       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4810             DAG.getConstant(SimpleSize, DL,
4811                             getShiftAmountTy(Lo.getValueType())));
4812       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4813       // Compute the low part as N0.
4814       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4815       return CombineTo(N, Lo, Hi);
4816     }
4817   }
4818 
4819   return SDValue();
4820 }
4821 
4822 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4823   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4824     return Res;
4825 
4826   EVT VT = N->getValueType(0);
4827   SDLoc DL(N);
4828 
4829   // (umul_lohi N0, 0) -> (0, 0)
4830   if (isNullConstant(N->getOperand(1))) {
4831     SDValue Zero = DAG.getConstant(0, DL, VT);
4832     return CombineTo(N, Zero, Zero);
4833   }
4834 
4835   // (umul_lohi N0, 1) -> (N0, 0)
4836   if (isOneConstant(N->getOperand(1))) {
4837     SDValue Zero = DAG.getConstant(0, DL, VT);
4838     return CombineTo(N, N->getOperand(0), Zero);
4839   }
4840 
4841   // If the type is twice as wide is legal, transform the mulhu to a wider
4842   // multiply plus a shift.
4843   if (VT.isSimple() && !VT.isVector()) {
4844     MVT Simple = VT.getSimpleVT();
4845     unsigned SimpleSize = Simple.getSizeInBits();
4846     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4847     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4848       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4849       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4850       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4851       // Compute the high part as N1.
4852       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4853             DAG.getConstant(SimpleSize, DL,
4854                             getShiftAmountTy(Lo.getValueType())));
4855       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4856       // Compute the low part as N0.
4857       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4858       return CombineTo(N, Lo, Hi);
4859     }
4860   }
4861 
4862   return SDValue();
4863 }
4864 
4865 SDValue DAGCombiner::visitMULO(SDNode *N) {
4866   SDValue N0 = N->getOperand(0);
4867   SDValue N1 = N->getOperand(1);
4868   EVT VT = N0.getValueType();
4869   bool IsSigned = (ISD::SMULO == N->getOpcode());
4870 
4871   EVT CarryVT = N->getValueType(1);
4872   SDLoc DL(N);
4873 
4874   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4875   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4876 
4877   // fold operation with constant operands.
4878   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4879   // multiple results.
4880   if (N0C && N1C) {
4881     bool Overflow;
4882     APInt Result =
4883         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4884                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4885     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4886                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4887   }
4888 
4889   // canonicalize constant to RHS.
4890   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4891       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4892     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4893 
4894   // fold (mulo x, 0) -> 0 + no carry out
4895   if (isNullOrNullSplat(N1))
4896     return CombineTo(N, DAG.getConstant(0, DL, VT),
4897                      DAG.getConstant(0, DL, CarryVT));
4898 
4899   // (mulo x, 2) -> (addo x, x)
4900   if (N1C && N1C->getAPIntValue() == 2)
4901     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4902                        N->getVTList(), N0, N0);
4903 
4904   if (IsSigned) {
4905     // A 1 bit SMULO overflows if both inputs are 1.
4906     if (VT.getScalarSizeInBits() == 1) {
4907       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4908       return CombineTo(N, And,
4909                        DAG.getSetCC(DL, CarryVT, And,
4910                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4911     }
4912 
4913     // Multiplying n * m significant bits yields a result of n + m significant
4914     // bits. If the total number of significant bits does not exceed the
4915     // result bit width (minus 1), there is no overflow.
4916     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4917     if (SignBits > 1)
4918       SignBits += DAG.ComputeNumSignBits(N1);
4919     if (SignBits > VT.getScalarSizeInBits() + 1)
4920       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4921                        DAG.getConstant(0, DL, CarryVT));
4922   } else {
4923     KnownBits N1Known = DAG.computeKnownBits(N1);
4924     KnownBits N0Known = DAG.computeKnownBits(N0);
4925     bool Overflow;
4926     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4927     if (!Overflow)
4928       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4929                        DAG.getConstant(0, DL, CarryVT));
4930   }
4931 
4932   return SDValue();
4933 }
4934 
4935 // Function to calculate whether the Min/Max pair of SDNodes (potentially
4936 // swapped around) make a signed saturate pattern, clamping to between a signed
4937 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
4938 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
4939 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
4940 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
4941 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
4942                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
4943                                   bool &Unsigned) {
4944   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
4945                             ISD::CondCode CC) {
4946     // The compare and select operand should be the same or the select operands
4947     // should be truncated versions of the comparison.
4948     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
4949       return 0;
4950     // The constants need to be the same or a truncated version of each other.
4951     ConstantSDNode *N1C = isConstOrConstSplat(N1);
4952     ConstantSDNode *N3C = isConstOrConstSplat(N3);
4953     if (!N1C || !N3C)
4954       return 0;
4955     const APInt &C1 = N1C->getAPIntValue();
4956     const APInt &C2 = N3C->getAPIntValue();
4957     if (C1.getBitWidth() < C2.getBitWidth() ||
4958         C1 != C2.sextOrSelf(C1.getBitWidth()))
4959       return 0;
4960     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
4961   };
4962 
4963   // Check the initial value is a SMIN/SMAX equivalent.
4964   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
4965   if (!Opcode0)
4966     return SDValue();
4967 
4968   SDValue N00, N01, N02, N03;
4969   ISD::CondCode N0CC;
4970   switch (N0.getOpcode()) {
4971   case ISD::SMIN:
4972   case ISD::SMAX:
4973     N00 = N02 = N0.getOperand(0);
4974     N01 = N03 = N0.getOperand(1);
4975     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
4976     break;
4977   case ISD::SELECT_CC:
4978     N00 = N0.getOperand(0);
4979     N01 = N0.getOperand(1);
4980     N02 = N0.getOperand(2);
4981     N03 = N0.getOperand(3);
4982     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
4983     break;
4984   case ISD::SELECT:
4985   case ISD::VSELECT:
4986     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
4987       return SDValue();
4988     N00 = N0.getOperand(0).getOperand(0);
4989     N01 = N0.getOperand(0).getOperand(1);
4990     N02 = N0.getOperand(1);
4991     N03 = N0.getOperand(2);
4992     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
4993     break;
4994   default:
4995     return SDValue();
4996   }
4997 
4998   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
4999   if (!Opcode1 || Opcode0 == Opcode1)
5000     return SDValue();
5001 
5002   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5003   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5004   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5005     return SDValue();
5006 
5007   const APInt &MinC = MinCOp->getAPIntValue();
5008   const APInt &MaxC = MaxCOp->getAPIntValue();
5009   APInt MinCPlus1 = MinC + 1;
5010   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5011     BW = MinCPlus1.exactLogBase2() + 1;
5012     Unsigned = false;
5013     return N02;
5014   }
5015 
5016   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5017     BW = MinCPlus1.exactLogBase2();
5018     Unsigned = true;
5019     return N02;
5020   }
5021 
5022   return SDValue();
5023 }
5024 
5025 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5026                                            SDValue N3, ISD::CondCode CC,
5027                                            SelectionDAG &DAG) {
5028   unsigned BW;
5029   bool Unsigned;
5030   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
5031   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5032     return SDValue();
5033   EVT FPVT = Fp.getOperand(0).getValueType();
5034   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5035   if (FPVT.isVector())
5036     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5037                              FPVT.getVectorElementCount());
5038   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5039   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5040     return SDValue();
5041   SDLoc DL(Fp);
5042   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5043                             DAG.getValueType(NewVT.getScalarType()));
5044   return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
5045                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
5046 }
5047 
5048 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5049                                          SDValue N3, ISD::CondCode CC,
5050                                          SelectionDAG &DAG) {
5051   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5052   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5053   // be truncated versions of the the setcc (N0/N1).
5054   if ((N0 != N2 &&
5055        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5056       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5057     return SDValue();
5058   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5059   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5060   if (!N1C || !N3C)
5061     return SDValue();
5062   const APInt &C1 = N1C->getAPIntValue();
5063   const APInt &C3 = N3C->getAPIntValue();
5064   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5065       C1 != C3.zextOrSelf(C1.getBitWidth()))
5066     return SDValue();
5067 
5068   unsigned BW = (C1 + 1).exactLogBase2();
5069   EVT FPVT = N0.getOperand(0).getValueType();
5070   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5071   if (FPVT.isVector())
5072     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5073                              FPVT.getVectorElementCount());
5074   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5075                                                         FPVT, NewVT))
5076     return SDValue();
5077 
5078   SDValue Sat =
5079       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5080                   DAG.getValueType(NewVT.getScalarType()));
5081   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5082 }
5083 
5084 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5085   SDValue N0 = N->getOperand(0);
5086   SDValue N1 = N->getOperand(1);
5087   EVT VT = N0.getValueType();
5088   unsigned Opcode = N->getOpcode();
5089   SDLoc DL(N);
5090 
5091   // fold operation with constant operands.
5092   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5093     return C;
5094 
5095   // canonicalize constant to RHS
5096   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5097       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5098     return DAG.getNode(Opcode, DL, VT, N1, N0);
5099 
5100   // fold vector ops
5101   if (VT.isVector())
5102     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5103       return FoldedVOp;
5104 
5105   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5106   // Only do this if the current op isn't legal and the flipped is.
5107   if (!TLI.isOperationLegal(Opcode, VT) &&
5108       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5109       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5110     unsigned AltOpcode;
5111     switch (Opcode) {
5112     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5113     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5114     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5115     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5116     default: llvm_unreachable("Unknown MINMAX opcode");
5117     }
5118     if (TLI.isOperationLegal(AltOpcode, VT))
5119       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5120   }
5121 
5122   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5123     if (SDValue S = PerformMinMaxFpToSatCombine(
5124             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5125       return S;
5126   if (Opcode == ISD::UMIN)
5127     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5128       return S;
5129 
5130   // Simplify the operands using demanded-bits information.
5131   if (SimplifyDemandedBits(SDValue(N, 0)))
5132     return SDValue(N, 0);
5133 
5134   return SDValue();
5135 }
5136 
5137 /// If this is a bitwise logic instruction and both operands have the same
5138 /// opcode, try to sink the other opcode after the logic instruction.
5139 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5140   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5141   EVT VT = N0.getValueType();
5142   unsigned LogicOpcode = N->getOpcode();
5143   unsigned HandOpcode = N0.getOpcode();
5144   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5145           LogicOpcode == ISD::XOR) && "Expected logic opcode");
5146   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5147 
5148   // Bail early if none of these transforms apply.
5149   if (N0.getNumOperands() == 0)
5150     return SDValue();
5151 
5152   // FIXME: We should check number of uses of the operands to not increase
5153   //        the instruction count for all transforms.
5154 
5155   // Handle size-changing casts.
5156   SDValue X = N0.getOperand(0);
5157   SDValue Y = N1.getOperand(0);
5158   EVT XVT = X.getValueType();
5159   SDLoc DL(N);
5160   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5161       HandOpcode == ISD::SIGN_EXTEND) {
5162     // If both operands have other uses, this transform would create extra
5163     // instructions without eliminating anything.
5164     if (!N0.hasOneUse() && !N1.hasOneUse())
5165       return SDValue();
5166     // We need matching integer source types.
5167     if (XVT != Y.getValueType())
5168       return SDValue();
5169     // Don't create an illegal op during or after legalization. Don't ever
5170     // create an unsupported vector op.
5171     if ((VT.isVector() || LegalOperations) &&
5172         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5173       return SDValue();
5174     // Avoid infinite looping with PromoteIntBinOp.
5175     // TODO: Should we apply desirable/legal constraints to all opcodes?
5176     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5177         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5178       return SDValue();
5179     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5180     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5181     return DAG.getNode(HandOpcode, DL, VT, Logic);
5182   }
5183 
5184   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5185   if (HandOpcode == ISD::TRUNCATE) {
5186     // If both operands have other uses, this transform would create extra
5187     // instructions without eliminating anything.
5188     if (!N0.hasOneUse() && !N1.hasOneUse())
5189       return SDValue();
5190     // We need matching source types.
5191     if (XVT != Y.getValueType())
5192       return SDValue();
5193     // Don't create an illegal op during or after legalization.
5194     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5195       return SDValue();
5196     // Be extra careful sinking truncate. If it's free, there's no benefit in
5197     // widening a binop. Also, don't create a logic op on an illegal type.
5198     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5199       return SDValue();
5200     if (!TLI.isTypeLegal(XVT))
5201       return SDValue();
5202     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5203     return DAG.getNode(HandOpcode, DL, VT, Logic);
5204   }
5205 
5206   // For binops SHL/SRL/SRA/AND:
5207   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5208   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5209        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5210       N0.getOperand(1) == N1.getOperand(1)) {
5211     // If either operand has other uses, this transform is not an improvement.
5212     if (!N0.hasOneUse() || !N1.hasOneUse())
5213       return SDValue();
5214     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5215     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5216   }
5217 
5218   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5219   if (HandOpcode == ISD::BSWAP) {
5220     // If either operand has other uses, this transform is not an improvement.
5221     if (!N0.hasOneUse() || !N1.hasOneUse())
5222       return SDValue();
5223     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5224     return DAG.getNode(HandOpcode, DL, VT, Logic);
5225   }
5226 
5227   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5228   // Only perform this optimization up until type legalization, before
5229   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5230   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5231   // we don't want to undo this promotion.
5232   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5233   // on scalars.
5234   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5235        Level <= AfterLegalizeTypes) {
5236     // Input types must be integer and the same.
5237     if (XVT.isInteger() && XVT == Y.getValueType() &&
5238         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5239           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5240       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5241       return DAG.getNode(HandOpcode, DL, VT, Logic);
5242     }
5243   }
5244 
5245   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5246   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5247   // If both shuffles use the same mask, and both shuffle within a single
5248   // vector, then it is worthwhile to move the swizzle after the operation.
5249   // The type-legalizer generates this pattern when loading illegal
5250   // vector types from memory. In many cases this allows additional shuffle
5251   // optimizations.
5252   // There are other cases where moving the shuffle after the xor/and/or
5253   // is profitable even if shuffles don't perform a swizzle.
5254   // If both shuffles use the same mask, and both shuffles have the same first
5255   // or second operand, then it might still be profitable to move the shuffle
5256   // after the xor/and/or operation.
5257   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5258     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5259     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5260     assert(X.getValueType() == Y.getValueType() &&
5261            "Inputs to shuffles are not the same type");
5262 
5263     // Check that both shuffles use the same mask. The masks are known to be of
5264     // the same length because the result vector type is the same.
5265     // Check also that shuffles have only one use to avoid introducing extra
5266     // instructions.
5267     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5268         !SVN0->getMask().equals(SVN1->getMask()))
5269       return SDValue();
5270 
5271     // Don't try to fold this node if it requires introducing a
5272     // build vector of all zeros that might be illegal at this stage.
5273     SDValue ShOp = N0.getOperand(1);
5274     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5275       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5276 
5277     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5278     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5279       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5280                                   N0.getOperand(0), N1.getOperand(0));
5281       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5282     }
5283 
5284     // Don't try to fold this node if it requires introducing a
5285     // build vector of all zeros that might be illegal at this stage.
5286     ShOp = N0.getOperand(0);
5287     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5288       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5289 
5290     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5291     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5292       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5293                                   N1.getOperand(1));
5294       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5295     }
5296   }
5297 
5298   return SDValue();
5299 }
5300 
5301 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5302 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5303                                        const SDLoc &DL) {
5304   SDValue LL, LR, RL, RR, N0CC, N1CC;
5305   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5306       !isSetCCEquivalent(N1, RL, RR, N1CC))
5307     return SDValue();
5308 
5309   assert(N0.getValueType() == N1.getValueType() &&
5310          "Unexpected operand types for bitwise logic op");
5311   assert(LL.getValueType() == LR.getValueType() &&
5312          RL.getValueType() == RR.getValueType() &&
5313          "Unexpected operand types for setcc");
5314 
5315   // If we're here post-legalization or the logic op type is not i1, the logic
5316   // op type must match a setcc result type. Also, all folds require new
5317   // operations on the left and right operands, so those types must match.
5318   EVT VT = N0.getValueType();
5319   EVT OpVT = LL.getValueType();
5320   if (LegalOperations || VT.getScalarType() != MVT::i1)
5321     if (VT != getSetCCResultType(OpVT))
5322       return SDValue();
5323   if (OpVT != RL.getValueType())
5324     return SDValue();
5325 
5326   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5327   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5328   bool IsInteger = OpVT.isInteger();
5329   if (LR == RR && CC0 == CC1 && IsInteger) {
5330     bool IsZero = isNullOrNullSplat(LR);
5331     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5332 
5333     // All bits clear?
5334     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5335     // All sign bits clear?
5336     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5337     // Any bits set?
5338     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5339     // Any sign bits set?
5340     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5341 
5342     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5343     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5344     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5345     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5346     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5347       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5348       AddToWorklist(Or.getNode());
5349       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5350     }
5351 
5352     // All bits set?
5353     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5354     // All sign bits set?
5355     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5356     // Any bits clear?
5357     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5358     // Any sign bits clear?
5359     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5360 
5361     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5362     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5363     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5364     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5365     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5366       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5367       AddToWorklist(And.getNode());
5368       return DAG.getSetCC(DL, VT, And, LR, CC1);
5369     }
5370   }
5371 
5372   // TODO: What is the 'or' equivalent of this fold?
5373   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5374   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5375       IsInteger && CC0 == ISD::SETNE &&
5376       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5377        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5378     SDValue One = DAG.getConstant(1, DL, OpVT);
5379     SDValue Two = DAG.getConstant(2, DL, OpVT);
5380     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5381     AddToWorklist(Add.getNode());
5382     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5383   }
5384 
5385   // Try more general transforms if the predicates match and the only user of
5386   // the compares is the 'and' or 'or'.
5387   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5388       N0.hasOneUse() && N1.hasOneUse()) {
5389     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5390     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5391     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5392       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5393       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5394       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5395       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5396       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5397     }
5398 
5399     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5400     // TODO - support non-uniform vector amounts.
5401     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5402       // Match a shared variable operand and 2 non-opaque constant operands.
5403       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5404       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5405       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5406         const APInt &CMax =
5407             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5408         const APInt &CMin =
5409             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5410         // The difference of the constants must be a single bit.
5411         if ((CMax - CMin).isPowerOf2()) {
5412           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5413           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5414           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5415           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5416           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5417           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5418           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5419           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5420           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5421           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5422         }
5423       }
5424     }
5425   }
5426 
5427   // Canonicalize equivalent operands to LL == RL.
5428   if (LL == RR && LR == RL) {
5429     CC1 = ISD::getSetCCSwappedOperands(CC1);
5430     std::swap(RL, RR);
5431   }
5432 
5433   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5434   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5435   if (LL == RL && LR == RR) {
5436     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5437                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5438     if (NewCC != ISD::SETCC_INVALID &&
5439         (!LegalOperations ||
5440          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5441           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5442       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5443   }
5444 
5445   return SDValue();
5446 }
5447 
5448 /// This contains all DAGCombine rules which reduce two values combined by
5449 /// an And operation to a single value. This makes them reusable in the context
5450 /// of visitSELECT(). Rules involving constants are not included as
5451 /// visitSELECT() already handles those cases.
5452 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5453   EVT VT = N1.getValueType();
5454   SDLoc DL(N);
5455 
5456   // fold (and x, undef) -> 0
5457   if (N0.isUndef() || N1.isUndef())
5458     return DAG.getConstant(0, DL, VT);
5459 
5460   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5461     return V;
5462 
5463   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5464   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5465       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5466     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5467       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5468         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5469         // immediate for an add, but it is legal if its top c2 bits are set,
5470         // transform the ADD so the immediate doesn't need to be materialized
5471         // in a register.
5472         APInt ADDC = ADDI->getAPIntValue();
5473         APInt SRLC = SRLI->getAPIntValue();
5474         if (ADDC.getMinSignedBits() <= 64 &&
5475             SRLC.ult(VT.getSizeInBits()) &&
5476             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5477           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5478                                              SRLC.getZExtValue());
5479           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5480             ADDC |= Mask;
5481             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5482               SDLoc DL0(N0);
5483               SDValue NewAdd =
5484                 DAG.getNode(ISD::ADD, DL0, VT,
5485                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5486               CombineTo(N0.getNode(), NewAdd);
5487               // Return N so it doesn't get rechecked!
5488               return SDValue(N, 0);
5489             }
5490           }
5491         }
5492       }
5493     }
5494   }
5495 
5496   // Reduce bit extract of low half of an integer to the narrower type.
5497   // (and (srl i64:x, K), KMask) ->
5498   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5499   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5500     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5501       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5502         unsigned Size = VT.getSizeInBits();
5503         const APInt &AndMask = CAnd->getAPIntValue();
5504         unsigned ShiftBits = CShift->getZExtValue();
5505 
5506         // Bail out, this node will probably disappear anyway.
5507         if (ShiftBits == 0)
5508           return SDValue();
5509 
5510         unsigned MaskBits = AndMask.countTrailingOnes();
5511         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5512 
5513         if (AndMask.isMask() &&
5514             // Required bits must not span the two halves of the integer and
5515             // must fit in the half size type.
5516             (ShiftBits + MaskBits <= Size / 2) &&
5517             TLI.isNarrowingProfitable(VT, HalfVT) &&
5518             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5519             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5520             TLI.isTruncateFree(VT, HalfVT) &&
5521             TLI.isZExtFree(HalfVT, VT)) {
5522           // The isNarrowingProfitable is to avoid regressions on PPC and
5523           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5524           // on downstream users of this. Those patterns could probably be
5525           // extended to handle extensions mixed in.
5526 
5527           SDValue SL(N0);
5528           assert(MaskBits <= Size);
5529 
5530           // Extracting the highest bit of the low half.
5531           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5532           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5533                                       N0.getOperand(0));
5534 
5535           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5536           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5537           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5538           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5539           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5540         }
5541       }
5542     }
5543   }
5544 
5545   return SDValue();
5546 }
5547 
5548 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5549                                    EVT LoadResultTy, EVT &ExtVT) {
5550   if (!AndC->getAPIntValue().isMask())
5551     return false;
5552 
5553   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5554 
5555   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5556   EVT LoadedVT = LoadN->getMemoryVT();
5557 
5558   if (ExtVT == LoadedVT &&
5559       (!LegalOperations ||
5560        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5561     // ZEXTLOAD will match without needing to change the size of the value being
5562     // loaded.
5563     return true;
5564   }
5565 
5566   // Do not change the width of a volatile or atomic loads.
5567   if (!LoadN->isSimple())
5568     return false;
5569 
5570   // Do not generate loads of non-round integer types since these can
5571   // be expensive (and would be wrong if the type is not byte sized).
5572   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5573     return false;
5574 
5575   if (LegalOperations &&
5576       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5577     return false;
5578 
5579   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5580     return false;
5581 
5582   return true;
5583 }
5584 
5585 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5586                                     ISD::LoadExtType ExtType, EVT &MemVT,
5587                                     unsigned ShAmt) {
5588   if (!LDST)
5589     return false;
5590   // Only allow byte offsets.
5591   if (ShAmt % 8)
5592     return false;
5593 
5594   // Do not generate loads of non-round integer types since these can
5595   // be expensive (and would be wrong if the type is not byte sized).
5596   if (!MemVT.isRound())
5597     return false;
5598 
5599   // Don't change the width of a volatile or atomic loads.
5600   if (!LDST->isSimple())
5601     return false;
5602 
5603   EVT LdStMemVT = LDST->getMemoryVT();
5604 
5605   // Bail out when changing the scalable property, since we can't be sure that
5606   // we're actually narrowing here.
5607   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5608     return false;
5609 
5610   // Verify that we are actually reducing a load width here.
5611   if (LdStMemVT.bitsLT(MemVT))
5612     return false;
5613 
5614   // Ensure that this isn't going to produce an unsupported memory access.
5615   if (ShAmt) {
5616     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5617     const unsigned ByteShAmt = ShAmt / 8;
5618     const Align LDSTAlign = LDST->getAlign();
5619     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5620     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5621                                 LDST->getAddressSpace(), NarrowAlign,
5622                                 LDST->getMemOperand()->getFlags()))
5623       return false;
5624   }
5625 
5626   // It's not possible to generate a constant of extended or untyped type.
5627   EVT PtrType = LDST->getBasePtr().getValueType();
5628   if (PtrType == MVT::Untyped || PtrType.isExtended())
5629     return false;
5630 
5631   if (isa<LoadSDNode>(LDST)) {
5632     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5633     // Don't transform one with multiple uses, this would require adding a new
5634     // load.
5635     if (!SDValue(Load, 0).hasOneUse())
5636       return false;
5637 
5638     if (LegalOperations &&
5639         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5640       return false;
5641 
5642     // For the transform to be legal, the load must produce only two values
5643     // (the value loaded and the chain).  Don't transform a pre-increment
5644     // load, for example, which produces an extra value.  Otherwise the
5645     // transformation is not equivalent, and the downstream logic to replace
5646     // uses gets things wrong.
5647     if (Load->getNumValues() > 2)
5648       return false;
5649 
5650     // If the load that we're shrinking is an extload and we're not just
5651     // discarding the extension we can't simply shrink the load. Bail.
5652     // TODO: It would be possible to merge the extensions in some cases.
5653     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5654         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5655       return false;
5656 
5657     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5658       return false;
5659   } else {
5660     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5661     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5662     // Can't write outside the original store
5663     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5664       return false;
5665 
5666     if (LegalOperations &&
5667         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5668       return false;
5669   }
5670   return true;
5671 }
5672 
5673 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5674                                     SmallVectorImpl<LoadSDNode*> &Loads,
5675                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5676                                     ConstantSDNode *Mask,
5677                                     SDNode *&NodeToMask) {
5678   // Recursively search for the operands, looking for loads which can be
5679   // narrowed.
5680   for (SDValue Op : N->op_values()) {
5681     if (Op.getValueType().isVector())
5682       return false;
5683 
5684     // Some constants may need fixing up later if they are too large.
5685     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5686       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5687           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5688         NodesWithConsts.insert(N);
5689       continue;
5690     }
5691 
5692     if (!Op.hasOneUse())
5693       return false;
5694 
5695     switch(Op.getOpcode()) {
5696     case ISD::LOAD: {
5697       auto *Load = cast<LoadSDNode>(Op);
5698       EVT ExtVT;
5699       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5700           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5701 
5702         // ZEXTLOAD is already small enough.
5703         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5704             ExtVT.bitsGE(Load->getMemoryVT()))
5705           continue;
5706 
5707         // Use LE to convert equal sized loads to zext.
5708         if (ExtVT.bitsLE(Load->getMemoryVT()))
5709           Loads.push_back(Load);
5710 
5711         continue;
5712       }
5713       return false;
5714     }
5715     case ISD::ZERO_EXTEND:
5716     case ISD::AssertZext: {
5717       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5718       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5719       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5720         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5721         Op.getOperand(0).getValueType();
5722 
5723       // We can accept extending nodes if the mask is wider or an equal
5724       // width to the original type.
5725       if (ExtVT.bitsGE(VT))
5726         continue;
5727       break;
5728     }
5729     case ISD::OR:
5730     case ISD::XOR:
5731     case ISD::AND:
5732       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5733                              NodeToMask))
5734         return false;
5735       continue;
5736     }
5737 
5738     // Allow one node which will masked along with any loads found.
5739     if (NodeToMask)
5740       return false;
5741 
5742     // Also ensure that the node to be masked only produces one data result.
5743     NodeToMask = Op.getNode();
5744     if (NodeToMask->getNumValues() > 1) {
5745       bool HasValue = false;
5746       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5747         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5748         if (VT != MVT::Glue && VT != MVT::Other) {
5749           if (HasValue) {
5750             NodeToMask = nullptr;
5751             return false;
5752           }
5753           HasValue = true;
5754         }
5755       }
5756       assert(HasValue && "Node to be masked has no data result?");
5757     }
5758   }
5759   return true;
5760 }
5761 
5762 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5763   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5764   if (!Mask)
5765     return false;
5766 
5767   if (!Mask->getAPIntValue().isMask())
5768     return false;
5769 
5770   // No need to do anything if the and directly uses a load.
5771   if (isa<LoadSDNode>(N->getOperand(0)))
5772     return false;
5773 
5774   SmallVector<LoadSDNode*, 8> Loads;
5775   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5776   SDNode *FixupNode = nullptr;
5777   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5778     if (Loads.size() == 0)
5779       return false;
5780 
5781     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5782     SDValue MaskOp = N->getOperand(1);
5783 
5784     // If it exists, fixup the single node we allow in the tree that needs
5785     // masking.
5786     if (FixupNode) {
5787       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5788       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5789                                 FixupNode->getValueType(0),
5790                                 SDValue(FixupNode, 0), MaskOp);
5791       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5792       if (And.getOpcode() == ISD ::AND)
5793         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5794     }
5795 
5796     // Narrow any constants that need it.
5797     for (auto *LogicN : NodesWithConsts) {
5798       SDValue Op0 = LogicN->getOperand(0);
5799       SDValue Op1 = LogicN->getOperand(1);
5800 
5801       if (isa<ConstantSDNode>(Op0))
5802           std::swap(Op0, Op1);
5803 
5804       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5805                                 Op1, MaskOp);
5806 
5807       DAG.UpdateNodeOperands(LogicN, Op0, And);
5808     }
5809 
5810     // Create narrow loads.
5811     for (auto *Load : Loads) {
5812       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5813       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5814                                 SDValue(Load, 0), MaskOp);
5815       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5816       if (And.getOpcode() == ISD ::AND)
5817         And = SDValue(
5818             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5819       SDValue NewLoad = reduceLoadWidth(And.getNode());
5820       assert(NewLoad &&
5821              "Shouldn't be masking the load if it can't be narrowed");
5822       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5823     }
5824     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5825     return true;
5826   }
5827   return false;
5828 }
5829 
5830 // Unfold
5831 //    x &  (-1 'logical shift' y)
5832 // To
5833 //    (x 'opposite logical shift' y) 'logical shift' y
5834 // if it is better for performance.
5835 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5836   assert(N->getOpcode() == ISD::AND);
5837 
5838   SDValue N0 = N->getOperand(0);
5839   SDValue N1 = N->getOperand(1);
5840 
5841   // Do we actually prefer shifts over mask?
5842   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5843     return SDValue();
5844 
5845   // Try to match  (-1 '[outer] logical shift' y)
5846   unsigned OuterShift;
5847   unsigned InnerShift; // The opposite direction to the OuterShift.
5848   SDValue Y;           // Shift amount.
5849   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5850     if (!M.hasOneUse())
5851       return false;
5852     OuterShift = M->getOpcode();
5853     if (OuterShift == ISD::SHL)
5854       InnerShift = ISD::SRL;
5855     else if (OuterShift == ISD::SRL)
5856       InnerShift = ISD::SHL;
5857     else
5858       return false;
5859     if (!isAllOnesConstant(M->getOperand(0)))
5860       return false;
5861     Y = M->getOperand(1);
5862     return true;
5863   };
5864 
5865   SDValue X;
5866   if (matchMask(N1))
5867     X = N0;
5868   else if (matchMask(N0))
5869     X = N1;
5870   else
5871     return SDValue();
5872 
5873   SDLoc DL(N);
5874   EVT VT = N->getValueType(0);
5875 
5876   //     tmp = x   'opposite logical shift' y
5877   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5878   //     ret = tmp 'logical shift' y
5879   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5880 
5881   return T1;
5882 }
5883 
5884 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5885 /// For a target with a bit test, this is expected to become test + set and save
5886 /// at least 1 instruction.
5887 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5888   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5889 
5890   // This is probably not worthwhile without a supported type.
5891   EVT VT = And->getValueType(0);
5892   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5893   if (!TLI.isTypeLegal(VT))
5894     return SDValue();
5895 
5896   // Look through an optional extension and find a 'not'.
5897   // TODO: Should we favor test+set even without the 'not' op?
5898   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5899   if (Not.getOpcode() == ISD::ANY_EXTEND)
5900     Not = Not.getOperand(0);
5901   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5902     return SDValue();
5903 
5904   // Look though an optional truncation. The source operand may not be the same
5905   // type as the original 'and', but that is ok because we are masking off
5906   // everything but the low bit.
5907   SDValue Srl = Not.getOperand(0);
5908   if (Srl.getOpcode() == ISD::TRUNCATE)
5909     Srl = Srl.getOperand(0);
5910 
5911   // Match a shift-right by constant.
5912   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5913       !isa<ConstantSDNode>(Srl.getOperand(1)))
5914     return SDValue();
5915 
5916   // We might have looked through casts that make this transform invalid.
5917   // TODO: If the source type is wider than the result type, do the mask and
5918   //       compare in the source type.
5919   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5920   unsigned VTBitWidth = VT.getSizeInBits();
5921   if (ShiftAmt.uge(VTBitWidth))
5922     return SDValue();
5923 
5924   // Turn this into a bit-test pattern using mask op + setcc:
5925   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5926   SDLoc DL(And);
5927   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5928   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5929   SDValue Mask = DAG.getConstant(
5930       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5931   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5932   SDValue Zero = DAG.getConstant(0, DL, VT);
5933   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5934   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5935 }
5936 
5937 /// For targets that support usubsat, match a bit-hack form of that operation
5938 /// that ends in 'and' and convert it.
5939 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
5940   SDValue N0 = N->getOperand(0);
5941   SDValue N1 = N->getOperand(1);
5942   EVT VT = N1.getValueType();
5943 
5944   // Canonicalize SRA as operand 1.
5945   if (N0.getOpcode() == ISD::SRA)
5946     std::swap(N0, N1);
5947 
5948   // xor/add with SMIN (signmask) are logically equivalent.
5949   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
5950     return SDValue();
5951 
5952   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
5953       N0.getOperand(0) != N1.getOperand(0))
5954     return SDValue();
5955 
5956   unsigned BitWidth = VT.getScalarSizeInBits();
5957   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
5958   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
5959   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
5960       !SraC || SraC->getAPIntValue() != BitWidth - 1)
5961     return SDValue();
5962 
5963   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
5964   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
5965   SDLoc DL(N);
5966   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
5967   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
5968 }
5969 
5970 /// Given a bitwise logic operation N with a matching bitwise logic operand,
5971 /// fold a pattern where 2 of the source operands are identically shifted
5972 /// values. For example:
5973 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
5974 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
5975                                  SelectionDAG &DAG) {
5976   unsigned LogicOpcode = N->getOpcode();
5977   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5978           LogicOpcode == ISD::XOR)
5979          && "Expected bitwise logic operation");
5980 
5981   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
5982     return SDValue();
5983 
5984   // Match another bitwise logic op and a shift.
5985   unsigned ShiftOpcode = ShiftOp.getOpcode();
5986   if (LogicOp.getOpcode() != LogicOpcode ||
5987       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
5988         ShiftOpcode == ISD::SRA))
5989     return SDValue();
5990 
5991   // Match another shift op inside the first logic operand. Handle both commuted
5992   // possibilities.
5993   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
5994   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
5995   SDValue X1 = ShiftOp.getOperand(0);
5996   SDValue Y = ShiftOp.getOperand(1);
5997   SDValue X0, Z;
5998   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
5999       LogicOp.getOperand(0).getOperand(1) == Y) {
6000     X0 = LogicOp.getOperand(0).getOperand(0);
6001     Z = LogicOp.getOperand(1);
6002   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6003              LogicOp.getOperand(1).getOperand(1) == Y) {
6004     X0 = LogicOp.getOperand(1).getOperand(0);
6005     Z = LogicOp.getOperand(0);
6006   } else {
6007     return SDValue();
6008   }
6009 
6010   EVT VT = N->getValueType(0);
6011   SDLoc DL(N);
6012   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6013   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6014   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6015 }
6016 
6017 SDValue DAGCombiner::visitAND(SDNode *N) {
6018   SDValue N0 = N->getOperand(0);
6019   SDValue N1 = N->getOperand(1);
6020   EVT VT = N1.getValueType();
6021 
6022   // x & x --> x
6023   if (N0 == N1)
6024     return N0;
6025 
6026   // fold (and c1, c2) -> c1&c2
6027   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6028     return C;
6029 
6030   // canonicalize constant to RHS
6031   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6032       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6033     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6034 
6035   // fold vector ops
6036   if (VT.isVector()) {
6037     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6038       return FoldedVOp;
6039 
6040     // fold (and x, 0) -> 0, vector edition
6041     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6042       // do not return N1, because undef node may exist in N1
6043       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6044                              SDLoc(N), N1.getValueType());
6045 
6046     // fold (and x, -1) -> x, vector edition
6047     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6048       return N0;
6049 
6050     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
6051     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6052     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
6053     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
6054         N0.hasOneUse() && N1.hasOneUse()) {
6055       EVT LoadVT = MLoad->getMemoryVT();
6056       EVT ExtVT = VT;
6057       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6058         // For this AND to be a zero extension of the masked load the elements
6059         // of the BuildVec must mask the bottom bits of the extended element
6060         // type
6061         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
6062           uint64_t ElementSize =
6063               LoadVT.getVectorElementType().getScalarSizeInBits();
6064           if (Splat->getAPIntValue().isMask(ElementSize)) {
6065             return DAG.getMaskedLoad(
6066                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6067                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6068                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6069                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6070           }
6071         }
6072       }
6073     }
6074   }
6075 
6076   // fold (and x, -1) -> x
6077   if (isAllOnesConstant(N1))
6078     return N0;
6079 
6080   // if (and x, c) is known to be zero, return 0
6081   unsigned BitWidth = VT.getScalarSizeInBits();
6082   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6083   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
6084     return DAG.getConstant(0, SDLoc(N), VT);
6085 
6086   if (SDValue NewSel = foldBinOpIntoSelect(N))
6087     return NewSel;
6088 
6089   // reassociate and
6090   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
6091     return RAND;
6092 
6093   // Try to convert a constant mask AND into a shuffle clear mask.
6094   if (VT.isVector())
6095     if (SDValue Shuffle = XformToShuffleWithZero(N))
6096       return Shuffle;
6097 
6098   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6099     return Combined;
6100 
6101   // fold (and (or x, C), D) -> D if (C & D) == D
6102   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6103     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6104   };
6105   if (N0.getOpcode() == ISD::OR &&
6106       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6107     return N1;
6108   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6109   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6110     SDValue N0Op0 = N0.getOperand(0);
6111     APInt Mask = ~N1C->getAPIntValue();
6112     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
6113     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
6114       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
6115                                  N0.getValueType(), N0Op0);
6116 
6117       // Replace uses of the AND with uses of the Zero extend node.
6118       CombineTo(N, Zext);
6119 
6120       // We actually want to replace all uses of the any_extend with the
6121       // zero_extend, to avoid duplicating things.  This will later cause this
6122       // AND to be folded.
6123       CombineTo(N0.getNode(), Zext);
6124       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6125     }
6126   }
6127 
6128   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6129   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6130   // already be zero by virtue of the width of the base type of the load.
6131   //
6132   // the 'X' node here can either be nothing or an extract_vector_elt to catch
6133   // more cases.
6134   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6135        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6136        N0.getOperand(0).getOpcode() == ISD::LOAD &&
6137        N0.getOperand(0).getResNo() == 0) ||
6138       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6139     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6140                                          N0 : N0.getOperand(0) );
6141 
6142     // Get the constant (if applicable) the zero'th operand is being ANDed with.
6143     // This can be a pure constant or a vector splat, in which case we treat the
6144     // vector as a scalar and use the splat value.
6145     APInt Constant = APInt::getZero(1);
6146     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
6147       Constant = C->getAPIntValue();
6148     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6149       APInt SplatValue, SplatUndef;
6150       unsigned SplatBitSize;
6151       bool HasAnyUndefs;
6152       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6153                                              SplatBitSize, HasAnyUndefs);
6154       if (IsSplat) {
6155         // Undef bits can contribute to a possible optimisation if set, so
6156         // set them.
6157         SplatValue |= SplatUndef;
6158 
6159         // The splat value may be something like "0x00FFFFFF", which means 0 for
6160         // the first vector value and FF for the rest, repeating. We need a mask
6161         // that will apply equally to all members of the vector, so AND all the
6162         // lanes of the constant together.
6163         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6164 
6165         // If the splat value has been compressed to a bitlength lower
6166         // than the size of the vector lane, we need to re-expand it to
6167         // the lane size.
6168         if (EltBitWidth > SplatBitSize)
6169           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6170                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6171             SplatValue |= SplatValue.shl(SplatBitSize);
6172 
6173         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6174         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6175         if ((SplatBitSize % EltBitWidth) == 0) {
6176           Constant = APInt::getAllOnes(EltBitWidth);
6177           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6178             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6179         }
6180       }
6181     }
6182 
6183     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6184     // actually legal and isn't going to get expanded, else this is a false
6185     // optimisation.
6186     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6187                                                     Load->getValueType(0),
6188                                                     Load->getMemoryVT());
6189 
6190     // Resize the constant to the same size as the original memory access before
6191     // extension. If it is still the AllOnesValue then this AND is completely
6192     // unneeded.
6193     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6194 
6195     bool B;
6196     switch (Load->getExtensionType()) {
6197     default: B = false; break;
6198     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6199     case ISD::ZEXTLOAD:
6200     case ISD::NON_EXTLOAD: B = true; break;
6201     }
6202 
6203     if (B && Constant.isAllOnes()) {
6204       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6205       // preserve semantics once we get rid of the AND.
6206       SDValue NewLoad(Load, 0);
6207 
6208       // Fold the AND away. NewLoad may get replaced immediately.
6209       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6210 
6211       if (Load->getExtensionType() == ISD::EXTLOAD) {
6212         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6213                               Load->getValueType(0), SDLoc(Load),
6214                               Load->getChain(), Load->getBasePtr(),
6215                               Load->getOffset(), Load->getMemoryVT(),
6216                               Load->getMemOperand());
6217         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6218         if (Load->getNumValues() == 3) {
6219           // PRE/POST_INC loads have 3 values.
6220           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6221                            NewLoad.getValue(2) };
6222           CombineTo(Load, To, 3, true);
6223         } else {
6224           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6225         }
6226       }
6227 
6228       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6229     }
6230   }
6231 
6232   // fold (and (masked_gather x)) -> (zext_masked_gather x)
6233   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6234     EVT MemVT = GN0->getMemoryVT();
6235     EVT ScalarVT = MemVT.getScalarType();
6236 
6237     if (SDValue(GN0, 0).hasOneUse() &&
6238         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6239         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6240       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
6241                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
6242 
6243       SDValue ZExtLoad = DAG.getMaskedGather(
6244           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6245           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6246 
6247       CombineTo(N, ZExtLoad);
6248       AddToWorklist(ZExtLoad.getNode());
6249       // Avoid recheck of N.
6250       return SDValue(N, 0);
6251     }
6252   }
6253 
6254   // fold (and (load x), 255) -> (zextload x, i8)
6255   // fold (and (extload x, i16), 255) -> (zextload x, i8)
6256   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6257   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6258                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
6259                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6260     if (SDValue Res = reduceLoadWidth(N)) {
6261       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6262         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6263       AddToWorklist(N);
6264       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6265       return SDValue(N, 0);
6266     }
6267   }
6268 
6269   if (LegalTypes) {
6270     // Attempt to propagate the AND back up to the leaves which, if they're
6271     // loads, can be combined to narrow loads and the AND node can be removed.
6272     // Perform after legalization so that extend nodes will already be
6273     // combined into the loads.
6274     if (BackwardsPropagateMask(N))
6275       return SDValue(N, 0);
6276   }
6277 
6278   if (SDValue Combined = visitANDLike(N0, N1, N))
6279     return Combined;
6280 
6281   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
6282   if (N0.getOpcode() == N1.getOpcode())
6283     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6284       return V;
6285 
6286   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6287     return R;
6288   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
6289     return R;
6290 
6291   // Masking the negated extension of a boolean is just the zero-extended
6292   // boolean:
6293   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6294   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6295   //
6296   // Note: the SimplifyDemandedBits fold below can make an information-losing
6297   // transform, and then we have no way to find this better fold.
6298   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6299     if (isNullOrNullSplat(N0.getOperand(0))) {
6300       SDValue SubRHS = N0.getOperand(1);
6301       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6302           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6303         return SubRHS;
6304       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6305           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6306         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6307     }
6308   }
6309 
6310   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6311   // fold (and (sra)) -> (and (srl)) when possible.
6312   if (SimplifyDemandedBits(SDValue(N, 0)))
6313     return SDValue(N, 0);
6314 
6315   // fold (zext_inreg (extload x)) -> (zextload x)
6316   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6317   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6318       (ISD::isEXTLoad(N0.getNode()) ||
6319        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6320     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6321     EVT MemVT = LN0->getMemoryVT();
6322     // If we zero all the possible extended bits, then we can turn this into
6323     // a zextload if we are running before legalize or the operation is legal.
6324     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6325     unsigned MemBitSize = MemVT.getScalarSizeInBits();
6326     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6327     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6328         ((!LegalOperations && LN0->isSimple()) ||
6329          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6330       SDValue ExtLoad =
6331           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6332                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6333       AddToWorklist(N);
6334       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6335       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6336     }
6337   }
6338 
6339   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6340   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6341     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6342                                            N0.getOperand(1), false))
6343       return BSwap;
6344   }
6345 
6346   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6347     return Shifts;
6348 
6349   if (TLI.hasBitTest(N0, N1))
6350     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6351       return V;
6352 
6353   // Recognize the following pattern:
6354   //
6355   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6356   //
6357   // where bitmask is a mask that clears the upper bits of AndVT. The
6358   // number of bits in bitmask must be a power of two.
6359   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6360     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6361       return false;
6362 
6363     auto *C = dyn_cast<ConstantSDNode>(RHS);
6364     if (!C)
6365       return false;
6366 
6367     if (!C->getAPIntValue().isMask(
6368             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6369       return false;
6370 
6371     return true;
6372   };
6373 
6374   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6375   if (IsAndZeroExtMask(N0, N1))
6376     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6377 
6378   if (hasOperation(ISD::USUBSAT, VT))
6379     if (SDValue V = foldAndToUsubsat(N, DAG))
6380       return V;
6381 
6382   return SDValue();
6383 }
6384 
6385 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6386 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6387                                         bool DemandHighBits) {
6388   if (!LegalOperations)
6389     return SDValue();
6390 
6391   EVT VT = N->getValueType(0);
6392   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6393     return SDValue();
6394   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6395     return SDValue();
6396 
6397   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6398   bool LookPassAnd0 = false;
6399   bool LookPassAnd1 = false;
6400   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6401       std::swap(N0, N1);
6402   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6403       std::swap(N0, N1);
6404   if (N0.getOpcode() == ISD::AND) {
6405     if (!N0->hasOneUse())
6406       return SDValue();
6407     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6408     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6409     // This is needed for X86.
6410     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6411                   N01C->getZExtValue() != 0xFFFF))
6412       return SDValue();
6413     N0 = N0.getOperand(0);
6414     LookPassAnd0 = true;
6415   }
6416 
6417   if (N1.getOpcode() == ISD::AND) {
6418     if (!N1->hasOneUse())
6419       return SDValue();
6420     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6421     if (!N11C || N11C->getZExtValue() != 0xFF)
6422       return SDValue();
6423     N1 = N1.getOperand(0);
6424     LookPassAnd1 = true;
6425   }
6426 
6427   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6428     std::swap(N0, N1);
6429   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6430     return SDValue();
6431   if (!N0->hasOneUse() || !N1->hasOneUse())
6432     return SDValue();
6433 
6434   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6435   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6436   if (!N01C || !N11C)
6437     return SDValue();
6438   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6439     return SDValue();
6440 
6441   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6442   SDValue N00 = N0->getOperand(0);
6443   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6444     if (!N00->hasOneUse())
6445       return SDValue();
6446     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6447     if (!N001C || N001C->getZExtValue() != 0xFF)
6448       return SDValue();
6449     N00 = N00.getOperand(0);
6450     LookPassAnd0 = true;
6451   }
6452 
6453   SDValue N10 = N1->getOperand(0);
6454   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6455     if (!N10->hasOneUse())
6456       return SDValue();
6457     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6458     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6459     // for X86.
6460     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6461                    N101C->getZExtValue() != 0xFFFF))
6462       return SDValue();
6463     N10 = N10.getOperand(0);
6464     LookPassAnd1 = true;
6465   }
6466 
6467   if (N00 != N10)
6468     return SDValue();
6469 
6470   // Make sure everything beyond the low halfword gets set to zero since the SRL
6471   // 16 will clear the top bits.
6472   unsigned OpSizeInBits = VT.getSizeInBits();
6473   if (DemandHighBits && OpSizeInBits > 16) {
6474     // If the left-shift isn't masked out then the only way this is a bswap is
6475     // if all bits beyond the low 8 are 0. In that case the entire pattern
6476     // reduces to a left shift anyway: leave it for other parts of the combiner.
6477     if (!LookPassAnd0)
6478       return SDValue();
6479 
6480     // However, if the right shift isn't masked out then it might be because
6481     // it's not needed. See if we can spot that too.
6482     if (!LookPassAnd1 &&
6483         !DAG.MaskedValueIsZero(
6484             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6485       return SDValue();
6486   }
6487 
6488   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6489   if (OpSizeInBits > 16) {
6490     SDLoc DL(N);
6491     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6492                       DAG.getConstant(OpSizeInBits - 16, DL,
6493                                       getShiftAmountTy(VT)));
6494   }
6495   return Res;
6496 }
6497 
6498 /// Return true if the specified node is an element that makes up a 32-bit
6499 /// packed halfword byteswap.
6500 /// ((x & 0x000000ff) << 8) |
6501 /// ((x & 0x0000ff00) >> 8) |
6502 /// ((x & 0x00ff0000) << 8) |
6503 /// ((x & 0xff000000) >> 8)
6504 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6505   if (!N->hasOneUse())
6506     return false;
6507 
6508   unsigned Opc = N.getOpcode();
6509   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6510     return false;
6511 
6512   SDValue N0 = N.getOperand(0);
6513   unsigned Opc0 = N0.getOpcode();
6514   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6515     return false;
6516 
6517   ConstantSDNode *N1C = nullptr;
6518   // SHL or SRL: look upstream for AND mask operand
6519   if (Opc == ISD::AND)
6520     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6521   else if (Opc0 == ISD::AND)
6522     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6523   if (!N1C)
6524     return false;
6525 
6526   unsigned MaskByteOffset;
6527   switch (N1C->getZExtValue()) {
6528   default:
6529     return false;
6530   case 0xFF:       MaskByteOffset = 0; break;
6531   case 0xFF00:     MaskByteOffset = 1; break;
6532   case 0xFFFF:
6533     // In case demanded bits didn't clear the bits that will be shifted out.
6534     // This is needed for X86.
6535     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6536       MaskByteOffset = 1;
6537       break;
6538     }
6539     return false;
6540   case 0xFF0000:   MaskByteOffset = 2; break;
6541   case 0xFF000000: MaskByteOffset = 3; break;
6542   }
6543 
6544   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6545   if (Opc == ISD::AND) {
6546     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6547       // (x >> 8) & 0xff
6548       // (x >> 8) & 0xff0000
6549       if (Opc0 != ISD::SRL)
6550         return false;
6551       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6552       if (!C || C->getZExtValue() != 8)
6553         return false;
6554     } else {
6555       // (x << 8) & 0xff00
6556       // (x << 8) & 0xff000000
6557       if (Opc0 != ISD::SHL)
6558         return false;
6559       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6560       if (!C || C->getZExtValue() != 8)
6561         return false;
6562     }
6563   } else if (Opc == ISD::SHL) {
6564     // (x & 0xff) << 8
6565     // (x & 0xff0000) << 8
6566     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6567       return false;
6568     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6569     if (!C || C->getZExtValue() != 8)
6570       return false;
6571   } else { // Opc == ISD::SRL
6572     // (x & 0xff00) >> 8
6573     // (x & 0xff000000) >> 8
6574     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6575       return false;
6576     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6577     if (!C || C->getZExtValue() != 8)
6578       return false;
6579   }
6580 
6581   if (Parts[MaskByteOffset])
6582     return false;
6583 
6584   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6585   return true;
6586 }
6587 
6588 // Match 2 elements of a packed halfword bswap.
6589 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6590   if (N.getOpcode() == ISD::OR)
6591     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6592            isBSwapHWordElement(N.getOperand(1), Parts);
6593 
6594   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6595     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6596     if (!C || C->getAPIntValue() != 16)
6597       return false;
6598     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6599     return true;
6600   }
6601 
6602   return false;
6603 }
6604 
6605 // Match this pattern:
6606 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6607 // And rewrite this to:
6608 //   (rotr (bswap A), 16)
6609 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6610                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6611                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6612   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6613          "MatchBSwapHWordOrAndAnd: expecting i32");
6614   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6615     return SDValue();
6616   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6617     return SDValue();
6618   // TODO: this is too restrictive; lifting this restriction requires more tests
6619   if (!N0->hasOneUse() || !N1->hasOneUse())
6620     return SDValue();
6621   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6622   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6623   if (!Mask0 || !Mask1)
6624     return SDValue();
6625   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6626       Mask1->getAPIntValue() != 0x00ff00ff)
6627     return SDValue();
6628   SDValue Shift0 = N0.getOperand(0);
6629   SDValue Shift1 = N1.getOperand(0);
6630   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6631     return SDValue();
6632   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6633   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6634   if (!ShiftAmt0 || !ShiftAmt1)
6635     return SDValue();
6636   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6637     return SDValue();
6638   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6639     return SDValue();
6640 
6641   SDLoc DL(N);
6642   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6643   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6644   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6645 }
6646 
6647 /// Match a 32-bit packed halfword bswap. That is
6648 /// ((x & 0x000000ff) << 8) |
6649 /// ((x & 0x0000ff00) >> 8) |
6650 /// ((x & 0x00ff0000) << 8) |
6651 /// ((x & 0xff000000) >> 8)
6652 /// => (rotl (bswap x), 16)
6653 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6654   if (!LegalOperations)
6655     return SDValue();
6656 
6657   EVT VT = N->getValueType(0);
6658   if (VT != MVT::i32)
6659     return SDValue();
6660   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6661     return SDValue();
6662 
6663   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6664                                               getShiftAmountTy(VT)))
6665   return BSwap;
6666 
6667   // Try again with commuted operands.
6668   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6669                                               getShiftAmountTy(VT)))
6670   return BSwap;
6671 
6672 
6673   // Look for either
6674   // (or (bswaphpair), (bswaphpair))
6675   // (or (or (bswaphpair), (and)), (and))
6676   // (or (or (and), (bswaphpair)), (and))
6677   SDNode *Parts[4] = {};
6678 
6679   if (isBSwapHWordPair(N0, Parts)) {
6680     // (or (or (and), (and)), (or (and), (and)))
6681     if (!isBSwapHWordPair(N1, Parts))
6682       return SDValue();
6683   } else if (N0.getOpcode() == ISD::OR) {
6684     // (or (or (or (and), (and)), (and)), (and))
6685     if (!isBSwapHWordElement(N1, Parts))
6686       return SDValue();
6687     SDValue N00 = N0.getOperand(0);
6688     SDValue N01 = N0.getOperand(1);
6689     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6690         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6691       return SDValue();
6692   } else
6693     return SDValue();
6694 
6695   // Make sure the parts are all coming from the same node.
6696   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6697     return SDValue();
6698 
6699   SDLoc DL(N);
6700   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6701                               SDValue(Parts[0], 0));
6702 
6703   // Result of the bswap should be rotated by 16. If it's not legal, then
6704   // do  (x << 16) | (x >> 16).
6705   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6706   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6707     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6708   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6709     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6710   return DAG.getNode(ISD::OR, DL, VT,
6711                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6712                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6713 }
6714 
6715 /// This contains all DAGCombine rules which reduce two values combined by
6716 /// an Or operation to a single value \see visitANDLike().
6717 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6718   EVT VT = N1.getValueType();
6719   SDLoc DL(N);
6720 
6721   // fold (or x, undef) -> -1
6722   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6723     return DAG.getAllOnesConstant(DL, VT);
6724 
6725   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6726     return V;
6727 
6728   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6729   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6730       // Don't increase # computations.
6731       (N0->hasOneUse() || N1->hasOneUse())) {
6732     // We can only do this xform if we know that bits from X that are set in C2
6733     // but not in C1 are already zero.  Likewise for Y.
6734     if (const ConstantSDNode *N0O1C =
6735         getAsNonOpaqueConstant(N0.getOperand(1))) {
6736       if (const ConstantSDNode *N1O1C =
6737           getAsNonOpaqueConstant(N1.getOperand(1))) {
6738         // We can only do this xform if we know that bits from X that are set in
6739         // C2 but not in C1 are already zero.  Likewise for Y.
6740         const APInt &LHSMask = N0O1C->getAPIntValue();
6741         const APInt &RHSMask = N1O1C->getAPIntValue();
6742 
6743         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6744             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6745           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6746                                   N0.getOperand(0), N1.getOperand(0));
6747           return DAG.getNode(ISD::AND, DL, VT, X,
6748                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6749         }
6750       }
6751     }
6752   }
6753 
6754   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6755   if (N0.getOpcode() == ISD::AND &&
6756       N1.getOpcode() == ISD::AND &&
6757       N0.getOperand(0) == N1.getOperand(0) &&
6758       // Don't increase # computations.
6759       (N0->hasOneUse() || N1->hasOneUse())) {
6760     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6761                             N0.getOperand(1), N1.getOperand(1));
6762     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6763   }
6764 
6765   return SDValue();
6766 }
6767 
6768 /// OR combines for which the commuted variant will be tried as well.
6769 static SDValue visitORCommutative(
6770     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6771   EVT VT = N0.getValueType();
6772   if (N0.getOpcode() == ISD::AND) {
6773     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6774     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6775       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6776 
6777     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6778     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6779       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6780   }
6781 
6782   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6783     return R;
6784 
6785   auto peekThroughZext = [](SDValue V) {
6786     if (V->getOpcode() == ISD::ZERO_EXTEND)
6787       return V->getOperand(0);
6788     return V;
6789   };
6790 
6791   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
6792   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
6793       N0.getOperand(0) == N1.getOperand(0) &&
6794       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6795     return N0;
6796 
6797   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
6798   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
6799       N0.getOperand(1) == N1.getOperand(0) &&
6800       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6801     return N0;
6802 
6803   return SDValue();
6804 }
6805 
6806 SDValue DAGCombiner::visitOR(SDNode *N) {
6807   SDValue N0 = N->getOperand(0);
6808   SDValue N1 = N->getOperand(1);
6809   EVT VT = N1.getValueType();
6810 
6811   // x | x --> x
6812   if (N0 == N1)
6813     return N0;
6814 
6815   // fold (or c1, c2) -> c1|c2
6816   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6817     return C;
6818 
6819   // canonicalize constant to RHS
6820   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6821       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6822     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6823 
6824   // fold vector ops
6825   if (VT.isVector()) {
6826     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6827       return FoldedVOp;
6828 
6829     // fold (or x, 0) -> x, vector edition
6830     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6831       return N0;
6832 
6833     // fold (or x, -1) -> -1, vector edition
6834     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6835       // do not return N1, because undef node may exist in N1
6836       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6837 
6838     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6839     // Do this only if the resulting shuffle is legal.
6840     if (isa<ShuffleVectorSDNode>(N0) &&
6841         isa<ShuffleVectorSDNode>(N1) &&
6842         // Avoid folding a node with illegal type.
6843         TLI.isTypeLegal(VT)) {
6844       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6845       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6846       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6847       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6848       // Ensure both shuffles have a zero input.
6849       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6850         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6851         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6852         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6853         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6854         bool CanFold = true;
6855         int NumElts = VT.getVectorNumElements();
6856         SmallVector<int, 4> Mask(NumElts);
6857 
6858         for (int i = 0; i != NumElts; ++i) {
6859           int M0 = SV0->getMaskElt(i);
6860           int M1 = SV1->getMaskElt(i);
6861 
6862           // Determine if either index is pointing to a zero vector.
6863           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6864           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6865 
6866           // If one element is zero and the otherside is undef, keep undef.
6867           // This also handles the case that both are undef.
6868           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6869             Mask[i] = -1;
6870             continue;
6871           }
6872 
6873           // Make sure only one of the elements is zero.
6874           if (M0Zero == M1Zero) {
6875             CanFold = false;
6876             break;
6877           }
6878 
6879           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6880 
6881           // We have a zero and non-zero element. If the non-zero came from
6882           // SV0 make the index a LHS index. If it came from SV1, make it
6883           // a RHS index. We need to mod by NumElts because we don't care
6884           // which operand it came from in the original shuffles.
6885           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6886         }
6887 
6888         if (CanFold) {
6889           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6890           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6891 
6892           SDValue LegalShuffle =
6893               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6894                                           Mask, DAG);
6895           if (LegalShuffle)
6896             return LegalShuffle;
6897         }
6898       }
6899     }
6900   }
6901 
6902   // fold (or x, 0) -> x
6903   if (isNullConstant(N1))
6904     return N0;
6905 
6906   // fold (or x, -1) -> -1
6907   if (isAllOnesConstant(N1))
6908     return N1;
6909 
6910   if (SDValue NewSel = foldBinOpIntoSelect(N))
6911     return NewSel;
6912 
6913   // fold (or x, c) -> c iff (x & ~c) == 0
6914   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6915   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6916     return N1;
6917 
6918   if (SDValue Combined = visitORLike(N0, N1, N))
6919     return Combined;
6920 
6921   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6922     return Combined;
6923 
6924   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6925   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6926     return BSwap;
6927   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6928     return BSwap;
6929 
6930   // reassociate or
6931   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6932     return ROR;
6933 
6934   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6935   // iff (c1 & c2) != 0 or c1/c2 are undef.
6936   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6937     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6938   };
6939   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
6940       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6941     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6942                                                  {N1, N0.getOperand(1)})) {
6943       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6944       AddToWorklist(IOR.getNode());
6945       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6946     }
6947   }
6948 
6949   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6950     return Combined;
6951   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6952     return Combined;
6953 
6954   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6955   if (N0.getOpcode() == N1.getOpcode())
6956     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6957       return V;
6958 
6959   // See if this is some rotate idiom.
6960   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6961     return Rot;
6962 
6963   if (SDValue Load = MatchLoadCombine(N))
6964     return Load;
6965 
6966   // Simplify the operands using demanded-bits information.
6967   if (SimplifyDemandedBits(SDValue(N, 0)))
6968     return SDValue(N, 0);
6969 
6970   // If OR can be rewritten into ADD, try combines based on ADD.
6971   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6972       DAG.haveNoCommonBitsSet(N0, N1))
6973     if (SDValue Combined = visitADDLike(N))
6974       return Combined;
6975 
6976   return SDValue();
6977 }
6978 
6979 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6980   if (Op.getOpcode() == ISD::AND &&
6981       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6982     Mask = Op.getOperand(1);
6983     return Op.getOperand(0);
6984   }
6985   return Op;
6986 }
6987 
6988 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6989 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6990                             SDValue &Mask) {
6991   Op = stripConstantMask(DAG, Op, Mask);
6992   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6993     Shift = Op;
6994     return true;
6995   }
6996   return false;
6997 }
6998 
6999 /// Helper function for visitOR to extract the needed side of a rotate idiom
7000 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
7001 /// InstCombine merged some outside op with one of the shifts from
7002 /// the rotate pattern.
7003 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7004 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
7005 /// patterns:
7006 ///
7007 ///   (or (add v v) (shrl v bitwidth-1)):
7008 ///     expands (add v v) -> (shl v 1)
7009 ///
7010 ///   (or (mul v c0) (shrl (mul v c1) c2)):
7011 ///     expands (mul v c0) -> (shl (mul v c1) c3)
7012 ///
7013 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
7014 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
7015 ///
7016 ///   (or (shl v c0) (shrl (shl v c1) c2)):
7017 ///     expands (shl v c0) -> (shl (shl v c1) c3)
7018 ///
7019 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
7020 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
7021 ///
7022 /// Such that in all cases, c3+c2==bitwidth(op v c1).
7023 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
7024                                      SDValue ExtractFrom, SDValue &Mask,
7025                                      const SDLoc &DL) {
7026   assert(OppShift && ExtractFrom && "Empty SDValue");
7027   assert(
7028       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
7029       "Existing shift must be valid as a rotate half");
7030 
7031   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7032 
7033   // Value and Type of the shift.
7034   SDValue OppShiftLHS = OppShift.getOperand(0);
7035   EVT ShiftedVT = OppShiftLHS.getValueType();
7036 
7037   // Amount of the existing shift.
7038   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7039 
7040   // (add v v) -> (shl v 1)
7041   // TODO: Should this be a general DAG canonicalization?
7042   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7043       ExtractFrom.getOpcode() == ISD::ADD &&
7044       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7045       ExtractFrom.getOperand(0) == OppShiftLHS &&
7046       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7047     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7048                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7049 
7050   // Preconditions:
7051   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7052   //
7053   // Find opcode of the needed shift to be extracted from (op0 v c0).
7054   unsigned Opcode = ISD::DELETED_NODE;
7055   bool IsMulOrDiv = false;
7056   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7057   // opcode or its arithmetic (mul or udiv) variant.
7058   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7059     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7060     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7061       return false;
7062     Opcode = NeededShift;
7063     return true;
7064   };
7065   // op0 must be either the needed shift opcode or the mul/udiv equivalent
7066   // that the needed shift can be extracted from.
7067   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7068       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7069     return SDValue();
7070 
7071   // op0 must be the same opcode on both sides, have the same LHS argument,
7072   // and produce the same value type.
7073   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7074       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7075       ShiftedVT != ExtractFrom.getValueType())
7076     return SDValue();
7077 
7078   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7079   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7080   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7081   ConstantSDNode *ExtractFromCst =
7082       isConstOrConstSplat(ExtractFrom.getOperand(1));
7083   // TODO: We should be able to handle non-uniform constant vectors for these values
7084   // Check that we have constant values.
7085   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7086       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7087       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7088     return SDValue();
7089 
7090   // Compute the shift amount we need to extract to complete the rotate.
7091   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7092   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7093     return SDValue();
7094   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7095   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7096   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7097   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7098   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7099 
7100   // Now try extract the needed shift from the ExtractFrom op and see if the
7101   // result matches up with the existing shift's LHS op.
7102   if (IsMulOrDiv) {
7103     // Op to extract from is a mul or udiv by a constant.
7104     // Check:
7105     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
7106     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
7107     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
7108                                                  NeededShiftAmt.getZExtValue());
7109     APInt ResultAmt;
7110     APInt Rem;
7111     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
7112     if (Rem != 0 || ResultAmt != OppLHSAmt)
7113       return SDValue();
7114   } else {
7115     // Op to extract from is a shift by a constant.
7116     // Check:
7117     //      c2 - (bitwidth(op0 v c0) - c1) == c0
7118     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
7119                                           ExtractFromAmt.getBitWidth()))
7120       return SDValue();
7121   }
7122 
7123   // Return the expanded shift op that should allow a rotate to be formed.
7124   EVT ShiftVT = OppShift.getOperand(1).getValueType();
7125   EVT ResVT = ExtractFrom.getValueType();
7126   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
7127   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
7128 }
7129 
7130 // Return true if we can prove that, whenever Neg and Pos are both in the
7131 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
7132 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
7133 //
7134 //     (or (shift1 X, Neg), (shift2 X, Pos))
7135 //
7136 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
7137 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
7138 // to consider shift amounts with defined behavior.
7139 //
7140 // The IsRotate flag should be set when the LHS of both shifts is the same.
7141 // Otherwise if matching a general funnel shift, it should be clear.
7142 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
7143                            SelectionDAG &DAG, bool IsRotate) {
7144   // If EltSize is a power of 2 then:
7145   //
7146   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
7147   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
7148   //
7149   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
7150   // for the stronger condition:
7151   //
7152   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
7153   //
7154   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
7155   // we can just replace Neg with Neg' for the rest of the function.
7156   //
7157   // In other cases we check for the even stronger condition:
7158   //
7159   //     Neg == EltSize - Pos                                    [B]
7160   //
7161   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
7162   // behavior if Pos == 0 (and consequently Neg == EltSize).
7163   //
7164   // We could actually use [A] whenever EltSize is a power of 2, but the
7165   // only extra cases that it would match are those uninteresting ones
7166   // where Neg and Pos are never in range at the same time.  E.g. for
7167   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7168   // as well as (sub 32, Pos), but:
7169   //
7170   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7171   //
7172   // always invokes undefined behavior for 32-bit X.
7173   //
7174   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7175   //
7176   // NOTE: We can only do this when matching an AND and not a general
7177   // funnel shift.
7178   unsigned MaskLoBits = 0;
7179   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7180     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
7181       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7182       unsigned Bits = Log2_64(EltSize);
7183       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7184           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7185         Neg = Neg.getOperand(0);
7186         MaskLoBits = Bits;
7187       }
7188     }
7189   }
7190 
7191   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7192   if (Neg.getOpcode() != ISD::SUB)
7193     return false;
7194   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7195   if (!NegC)
7196     return false;
7197   SDValue NegOp1 = Neg.getOperand(1);
7198 
7199   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7200   // Pos'.  The truncation is redundant for the purpose of the equality.
7201   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7202     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
7203       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7204       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7205           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7206            MaskLoBits))
7207         Pos = Pos.getOperand(0);
7208     }
7209   }
7210 
7211   // The condition we need is now:
7212   //
7213   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7214   //
7215   // If NegOp1 == Pos then we need:
7216   //
7217   //              EltSize & Mask == NegC & Mask
7218   //
7219   // (because "x & Mask" is a truncation and distributes through subtraction).
7220   //
7221   // We also need to account for a potential truncation of NegOp1 if the amount
7222   // has already been legalized to a shift amount type.
7223   APInt Width;
7224   if ((Pos == NegOp1) ||
7225       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7226     Width = NegC->getAPIntValue();
7227 
7228   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7229   // Then the condition we want to prove becomes:
7230   //
7231   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7232   //
7233   // which, again because "x & Mask" is a truncation, becomes:
7234   //
7235   //                NegC & Mask == (EltSize - PosC) & Mask
7236   //             EltSize & Mask == (NegC + PosC) & Mask
7237   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7238     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7239       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7240     else
7241       return false;
7242   } else
7243     return false;
7244 
7245   // Now we just need to check that EltSize & Mask == Width & Mask.
7246   if (MaskLoBits)
7247     // EltSize & Mask is 0 since Mask is EltSize - 1.
7248     return Width.getLoBits(MaskLoBits) == 0;
7249   return Width == EltSize;
7250 }
7251 
7252 // A subroutine of MatchRotate used once we have found an OR of two opposite
7253 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
7254 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7255 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7256 // Neg with outer conversions stripped away.
7257 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7258                                        SDValue Neg, SDValue InnerPos,
7259                                        SDValue InnerNeg, bool HasPos,
7260                                        unsigned PosOpcode, unsigned NegOpcode,
7261                                        const SDLoc &DL) {
7262   // fold (or (shl x, (*ext y)),
7263   //          (srl x, (*ext (sub 32, y)))) ->
7264   //   (rotl x, y) or (rotr x, (sub 32, y))
7265   //
7266   // fold (or (shl x, (*ext (sub 32, y))),
7267   //          (srl x, (*ext y))) ->
7268   //   (rotr x, y) or (rotl x, (sub 32, y))
7269   EVT VT = Shifted.getValueType();
7270   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7271                      /*IsRotate*/ true)) {
7272     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7273                        HasPos ? Pos : Neg);
7274   }
7275 
7276   return SDValue();
7277 }
7278 
7279 // A subroutine of MatchRotate used once we have found an OR of two opposite
7280 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
7281 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7282 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7283 // Neg with outer conversions stripped away.
7284 // TODO: Merge with MatchRotatePosNeg.
7285 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7286                                        SDValue Neg, SDValue InnerPos,
7287                                        SDValue InnerNeg, bool HasPos,
7288                                        unsigned PosOpcode, unsigned NegOpcode,
7289                                        const SDLoc &DL) {
7290   EVT VT = N0.getValueType();
7291   unsigned EltBits = VT.getScalarSizeInBits();
7292 
7293   // fold (or (shl x0, (*ext y)),
7294   //          (srl x1, (*ext (sub 32, y)))) ->
7295   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7296   //
7297   // fold (or (shl x0, (*ext (sub 32, y))),
7298   //          (srl x1, (*ext y))) ->
7299   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7300   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7301     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7302                        HasPos ? Pos : Neg);
7303   }
7304 
7305   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7306   // so for now just use the PosOpcode case if its legal.
7307   // TODO: When can we use the NegOpcode case?
7308   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7309     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7310       if (Op.getOpcode() != BinOpc)
7311         return false;
7312       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7313       return Cst && (Cst->getAPIntValue() == Imm);
7314     };
7315 
7316     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7317     //   -> (fshl x0, x1, y)
7318     if (IsBinOpImm(N1, ISD::SRL, 1) &&
7319         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7320         InnerPos == InnerNeg.getOperand(0) &&
7321         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7322       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7323     }
7324 
7325     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7326     //   -> (fshr x0, x1, y)
7327     if (IsBinOpImm(N0, ISD::SHL, 1) &&
7328         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7329         InnerNeg == InnerPos.getOperand(0) &&
7330         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7331       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7332     }
7333 
7334     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7335     //   -> (fshr x0, x1, y)
7336     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7337     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7338         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7339         InnerNeg == InnerPos.getOperand(0) &&
7340         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7341       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7342     }
7343   }
7344 
7345   return SDValue();
7346 }
7347 
7348 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
7349 // idioms for rotate, and if the target supports rotation instructions, generate
7350 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7351 // with different shifted sources.
7352 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7353   EVT VT = LHS.getValueType();
7354 
7355   // The target must have at least one rotate/funnel flavor.
7356   // We still try to match rotate by constant pre-legalization.
7357   // TODO: Support pre-legalization funnel-shift by constant.
7358   bool HasROTL = hasOperation(ISD::ROTL, VT);
7359   bool HasROTR = hasOperation(ISD::ROTR, VT);
7360   bool HasFSHL = hasOperation(ISD::FSHL, VT);
7361   bool HasFSHR = hasOperation(ISD::FSHR, VT);
7362 
7363   // If the type is going to be promoted and the target has enabled custom
7364   // lowering for rotate, allow matching rotate by non-constants. Only allow
7365   // this for scalar types.
7366   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
7367                                   TargetLowering::TypePromoteInteger) {
7368     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
7369     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
7370   }
7371 
7372   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7373     return SDValue();
7374 
7375   // Check for truncated rotate.
7376   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7377       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7378     assert(LHS.getValueType() == RHS.getValueType());
7379     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7380       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7381     }
7382   }
7383 
7384   // Match "(X shl/srl V1) & V2" where V2 may not be present.
7385   SDValue LHSShift;   // The shift.
7386   SDValue LHSMask;    // AND value if any.
7387   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7388 
7389   SDValue RHSShift;   // The shift.
7390   SDValue RHSMask;    // AND value if any.
7391   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7392 
7393   // If neither side matched a rotate half, bail
7394   if (!LHSShift && !RHSShift)
7395     return SDValue();
7396 
7397   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7398   // side of the rotate, so try to handle that here. In all cases we need to
7399   // pass the matched shift from the opposite side to compute the opcode and
7400   // needed shift amount to extract.  We still want to do this if both sides
7401   // matched a rotate half because one half may be a potential overshift that
7402   // can be broken down (ie if InstCombine merged two shl or srl ops into a
7403   // single one).
7404 
7405   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7406   if (LHSShift)
7407     if (SDValue NewRHSShift =
7408             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7409       RHSShift = NewRHSShift;
7410   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7411   if (RHSShift)
7412     if (SDValue NewLHSShift =
7413             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7414       LHSShift = NewLHSShift;
7415 
7416   // If a side is still missing, nothing else we can do.
7417   if (!RHSShift || !LHSShift)
7418     return SDValue();
7419 
7420   // At this point we've matched or extracted a shift op on each side.
7421 
7422   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7423     return SDValue(); // Shifts must disagree.
7424 
7425   // Canonicalize shl to left side in a shl/srl pair.
7426   if (RHSShift.getOpcode() == ISD::SHL) {
7427     std::swap(LHS, RHS);
7428     std::swap(LHSShift, RHSShift);
7429     std::swap(LHSMask, RHSMask);
7430   }
7431 
7432   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7433   SDValue LHSShiftArg = LHSShift.getOperand(0);
7434   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7435   SDValue RHSShiftArg = RHSShift.getOperand(0);
7436   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7437 
7438   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7439                                         ConstantSDNode *RHS) {
7440     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7441   };
7442 
7443   // TODO: Support pre-legalization funnel-shift by constant.
7444   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7445   if (!IsRotate && !(HasFSHL || HasFSHR)) {
7446     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
7447         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7448       // Look for a disguised rotate by constant.
7449       // The common shifted operand X may be hidden inside another 'or'.
7450       SDValue X, Y;
7451       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
7452         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
7453           return false;
7454         if (CommonOp == Or.getOperand(0)) {
7455           X = CommonOp;
7456           Y = Or.getOperand(1);
7457           return true;
7458         }
7459         if (CommonOp == Or.getOperand(1)) {
7460           X = CommonOp;
7461           Y = Or.getOperand(0);
7462           return true;
7463         }
7464         return false;
7465       };
7466 
7467       // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
7468       if (matchOr(LHSShiftArg, RHSShiftArg)) {
7469         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7470         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
7471         return DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
7472       }
7473       // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
7474       if (matchOr(RHSShiftArg, LHSShiftArg)) {
7475         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7476         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
7477         return DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
7478       }
7479     }
7480 
7481     return SDValue(); // Requires funnel shift support.
7482   }
7483 
7484   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7485   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7486   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7487   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7488   // iff C1+C2 == EltSizeInBits
7489   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7490     SDValue Res;
7491     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7492       bool UseROTL = !LegalOperations || HasROTL;
7493       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7494                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
7495     } else {
7496       bool UseFSHL = !LegalOperations || HasFSHL;
7497       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7498                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7499     }
7500 
7501     // If there is an AND of either shifted operand, apply it to the result.
7502     if (LHSMask.getNode() || RHSMask.getNode()) {
7503       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7504       SDValue Mask = AllOnes;
7505 
7506       if (LHSMask.getNode()) {
7507         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7508         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7509                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7510       }
7511       if (RHSMask.getNode()) {
7512         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7513         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7514                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7515       }
7516 
7517       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7518     }
7519 
7520     return Res;
7521   }
7522 
7523   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7524   // shift.
7525   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7526     return SDValue();
7527 
7528   // If there is a mask here, and we have a variable shift, we can't be sure
7529   // that we're masking out the right stuff.
7530   if (LHSMask.getNode() || RHSMask.getNode())
7531     return SDValue();
7532 
7533   // If the shift amount is sign/zext/any-extended just peel it off.
7534   SDValue LExtOp0 = LHSShiftAmt;
7535   SDValue RExtOp0 = RHSShiftAmt;
7536   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7537        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7538        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7539        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7540       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7541        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7542        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7543        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7544     LExtOp0 = LHSShiftAmt.getOperand(0);
7545     RExtOp0 = RHSShiftAmt.getOperand(0);
7546   }
7547 
7548   if (IsRotate && (HasROTL || HasROTR)) {
7549     SDValue TryL =
7550         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7551                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
7552     if (TryL)
7553       return TryL;
7554 
7555     SDValue TryR =
7556         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7557                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
7558     if (TryR)
7559       return TryR;
7560   }
7561 
7562   SDValue TryL =
7563       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7564                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
7565   if (TryL)
7566     return TryL;
7567 
7568   SDValue TryR =
7569       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7570                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
7571   if (TryR)
7572     return TryR;
7573 
7574   return SDValue();
7575 }
7576 
7577 namespace {
7578 
7579 /// Represents known origin of an individual byte in load combine pattern. The
7580 /// value of the byte is either constant zero or comes from memory.
7581 struct ByteProvider {
7582   // For constant zero providers Load is set to nullptr. For memory providers
7583   // Load represents the node which loads the byte from memory.
7584   // ByteOffset is the offset of the byte in the value produced by the load.
7585   LoadSDNode *Load = nullptr;
7586   unsigned ByteOffset = 0;
7587 
7588   ByteProvider() = default;
7589 
7590   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7591     return ByteProvider(Load, ByteOffset);
7592   }
7593 
7594   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7595 
7596   bool isConstantZero() const { return !Load; }
7597   bool isMemory() const { return Load; }
7598 
7599   bool operator==(const ByteProvider &Other) const {
7600     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7601   }
7602 
7603 private:
7604   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7605       : Load(Load), ByteOffset(ByteOffset) {}
7606 };
7607 
7608 } // end anonymous namespace
7609 
7610 /// Recursively traverses the expression calculating the origin of the requested
7611 /// byte of the given value. Returns None if the provider can't be calculated.
7612 ///
7613 /// For all the values except the root of the expression verifies that the value
7614 /// has exactly one use and if it's not true return None. This way if the origin
7615 /// of the byte is returned it's guaranteed that the values which contribute to
7616 /// the byte are not used outside of this expression.
7617 ///
7618 /// Because the parts of the expression are not allowed to have more than one
7619 /// use this function iterates over trees, not DAGs. So it never visits the same
7620 /// node more than once.
7621 static const Optional<ByteProvider>
7622 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7623                       bool Root = false) {
7624   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7625   if (Depth == 10)
7626     return None;
7627 
7628   if (!Root && !Op.hasOneUse())
7629     return None;
7630 
7631   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7632   unsigned BitWidth = Op.getValueSizeInBits();
7633   if (BitWidth % 8 != 0)
7634     return None;
7635   unsigned ByteWidth = BitWidth / 8;
7636   assert(Index < ByteWidth && "invalid index requested");
7637   (void) ByteWidth;
7638 
7639   switch (Op.getOpcode()) {
7640   case ISD::OR: {
7641     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7642     if (!LHS)
7643       return None;
7644     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7645     if (!RHS)
7646       return None;
7647 
7648     if (LHS->isConstantZero())
7649       return RHS;
7650     if (RHS->isConstantZero())
7651       return LHS;
7652     return None;
7653   }
7654   case ISD::SHL: {
7655     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7656     if (!ShiftOp)
7657       return None;
7658 
7659     uint64_t BitShift = ShiftOp->getZExtValue();
7660     if (BitShift % 8 != 0)
7661       return None;
7662     uint64_t ByteShift = BitShift / 8;
7663 
7664     return Index < ByteShift
7665                ? ByteProvider::getConstantZero()
7666                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7667                                        Depth + 1);
7668   }
7669   case ISD::ANY_EXTEND:
7670   case ISD::SIGN_EXTEND:
7671   case ISD::ZERO_EXTEND: {
7672     SDValue NarrowOp = Op->getOperand(0);
7673     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7674     if (NarrowBitWidth % 8 != 0)
7675       return None;
7676     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7677 
7678     if (Index >= NarrowByteWidth)
7679       return Op.getOpcode() == ISD::ZERO_EXTEND
7680                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7681                  : None;
7682     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7683   }
7684   case ISD::BSWAP:
7685     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7686                                  Depth + 1);
7687   case ISD::LOAD: {
7688     auto L = cast<LoadSDNode>(Op.getNode());
7689     if (!L->isSimple() || L->isIndexed())
7690       return None;
7691 
7692     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7693     if (NarrowBitWidth % 8 != 0)
7694       return None;
7695     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7696 
7697     if (Index >= NarrowByteWidth)
7698       return L->getExtensionType() == ISD::ZEXTLOAD
7699                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7700                  : None;
7701     return ByteProvider::getMemory(L, Index);
7702   }
7703   }
7704 
7705   return None;
7706 }
7707 
7708 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7709   return i;
7710 }
7711 
7712 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7713   return BW - i - 1;
7714 }
7715 
7716 // Check if the bytes offsets we are looking at match with either big or
7717 // little endian value loaded. Return true for big endian, false for little
7718 // endian, and None if match failed.
7719 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7720                                   int64_t FirstOffset) {
7721   // The endian can be decided only when it is 2 bytes at least.
7722   unsigned Width = ByteOffsets.size();
7723   if (Width < 2)
7724     return None;
7725 
7726   bool BigEndian = true, LittleEndian = true;
7727   for (unsigned i = 0; i < Width; i++) {
7728     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7729     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7730     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7731     if (!BigEndian && !LittleEndian)
7732       return None;
7733   }
7734 
7735   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7736                                         "little endian");
7737   return BigEndian;
7738 }
7739 
7740 static SDValue stripTruncAndExt(SDValue Value) {
7741   switch (Value.getOpcode()) {
7742   case ISD::TRUNCATE:
7743   case ISD::ZERO_EXTEND:
7744   case ISD::SIGN_EXTEND:
7745   case ISD::ANY_EXTEND:
7746     return stripTruncAndExt(Value.getOperand(0));
7747   }
7748   return Value;
7749 }
7750 
7751 /// Match a pattern where a wide type scalar value is stored by several narrow
7752 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7753 /// supports it.
7754 ///
7755 /// Assuming little endian target:
7756 ///  i8 *p = ...
7757 ///  i32 val = ...
7758 ///  p[0] = (val >> 0) & 0xFF;
7759 ///  p[1] = (val >> 8) & 0xFF;
7760 ///  p[2] = (val >> 16) & 0xFF;
7761 ///  p[3] = (val >> 24) & 0xFF;
7762 /// =>
7763 ///  *((i32)p) = val;
7764 ///
7765 ///  i8 *p = ...
7766 ///  i32 val = ...
7767 ///  p[0] = (val >> 24) & 0xFF;
7768 ///  p[1] = (val >> 16) & 0xFF;
7769 ///  p[2] = (val >> 8) & 0xFF;
7770 ///  p[3] = (val >> 0) & 0xFF;
7771 /// =>
7772 ///  *((i32)p) = BSWAP(val);
7773 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7774   // The matching looks for "store (trunc x)" patterns that appear early but are
7775   // likely to be replaced by truncating store nodes during combining.
7776   // TODO: If there is evidence that running this later would help, this
7777   //       limitation could be removed. Legality checks may need to be added
7778   //       for the created store and optional bswap/rotate.
7779   if (LegalOperations || OptLevel == CodeGenOpt::None)
7780     return SDValue();
7781 
7782   // We only handle merging simple stores of 1-4 bytes.
7783   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7784   EVT MemVT = N->getMemoryVT();
7785   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7786       !N->isSimple() || N->isIndexed())
7787     return SDValue();
7788 
7789   // Collect all of the stores in the chain.
7790   SDValue Chain = N->getChain();
7791   SmallVector<StoreSDNode *, 8> Stores = {N};
7792   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7793     // All stores must be the same size to ensure that we are writing all of the
7794     // bytes in the wide value.
7795     // TODO: We could allow multiple sizes by tracking each stored byte.
7796     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7797         Store->isIndexed())
7798       return SDValue();
7799     Stores.push_back(Store);
7800     Chain = Store->getChain();
7801   }
7802   // There is no reason to continue if we do not have at least a pair of stores.
7803   if (Stores.size() < 2)
7804     return SDValue();
7805 
7806   // Handle simple types only.
7807   LLVMContext &Context = *DAG.getContext();
7808   unsigned NumStores = Stores.size();
7809   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7810   unsigned WideNumBits = NumStores * NarrowNumBits;
7811   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7812   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7813     return SDValue();
7814 
7815   // Check if all bytes of the source value that we are looking at are stored
7816   // to the same base address. Collect offsets from Base address into OffsetMap.
7817   SDValue SourceValue;
7818   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7819   int64_t FirstOffset = INT64_MAX;
7820   StoreSDNode *FirstStore = nullptr;
7821   Optional<BaseIndexOffset> Base;
7822   for (auto Store : Stores) {
7823     // All the stores store different parts of the CombinedValue. A truncate is
7824     // required to get the partial value.
7825     SDValue Trunc = Store->getValue();
7826     if (Trunc.getOpcode() != ISD::TRUNCATE)
7827       return SDValue();
7828     // Other than the first/last part, a shift operation is required to get the
7829     // offset.
7830     int64_t Offset = 0;
7831     SDValue WideVal = Trunc.getOperand(0);
7832     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7833         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7834       // The shift amount must be a constant multiple of the narrow type.
7835       // It is translated to the offset address in the wide source value "y".
7836       //
7837       // x = srl y, ShiftAmtC
7838       // i8 z = trunc x
7839       // store z, ...
7840       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7841       if (ShiftAmtC % NarrowNumBits != 0)
7842         return SDValue();
7843 
7844       Offset = ShiftAmtC / NarrowNumBits;
7845       WideVal = WideVal.getOperand(0);
7846     }
7847 
7848     // Stores must share the same source value with different offsets.
7849     // Truncate and extends should be stripped to get the single source value.
7850     if (!SourceValue)
7851       SourceValue = WideVal;
7852     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7853       return SDValue();
7854     else if (SourceValue.getValueType() != WideVT) {
7855       if (WideVal.getValueType() == WideVT ||
7856           WideVal.getScalarValueSizeInBits() >
7857               SourceValue.getScalarValueSizeInBits())
7858         SourceValue = WideVal;
7859       // Give up if the source value type is smaller than the store size.
7860       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7861         return SDValue();
7862     }
7863 
7864     // Stores must share the same base address.
7865     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7866     int64_t ByteOffsetFromBase = 0;
7867     if (!Base)
7868       Base = Ptr;
7869     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7870       return SDValue();
7871 
7872     // Remember the first store.
7873     if (ByteOffsetFromBase < FirstOffset) {
7874       FirstStore = Store;
7875       FirstOffset = ByteOffsetFromBase;
7876     }
7877     // Map the offset in the store and the offset in the combined value, and
7878     // early return if it has been set before.
7879     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7880       return SDValue();
7881     OffsetMap[Offset] = ByteOffsetFromBase;
7882   }
7883 
7884   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7885   assert(FirstStore && "First store must be set");
7886 
7887   // Check that a store of the wide type is both allowed and fast on the target
7888   const DataLayout &Layout = DAG.getDataLayout();
7889   bool Fast = false;
7890   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7891                                         *FirstStore->getMemOperand(), &Fast);
7892   if (!Allowed || !Fast)
7893     return SDValue();
7894 
7895   // Check if the pieces of the value are going to the expected places in memory
7896   // to merge the stores.
7897   auto checkOffsets = [&](bool MatchLittleEndian) {
7898     if (MatchLittleEndian) {
7899       for (unsigned i = 0; i != NumStores; ++i)
7900         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7901           return false;
7902     } else { // MatchBigEndian by reversing loop counter.
7903       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7904         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7905           return false;
7906     }
7907     return true;
7908   };
7909 
7910   // Check if the offsets line up for the native data layout of this target.
7911   bool NeedBswap = false;
7912   bool NeedRotate = false;
7913   if (!checkOffsets(Layout.isLittleEndian())) {
7914     // Special-case: check if byte offsets line up for the opposite endian.
7915     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7916       NeedBswap = true;
7917     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7918       NeedRotate = true;
7919     else
7920       return SDValue();
7921   }
7922 
7923   SDLoc DL(N);
7924   if (WideVT != SourceValue.getValueType()) {
7925     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7926            "Unexpected store value to merge");
7927     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7928   }
7929 
7930   // Before legalize we can introduce illegal bswaps/rotates which will be later
7931   // converted to an explicit bswap sequence. This way we end up with a single
7932   // store and byte shuffling instead of several stores and byte shuffling.
7933   if (NeedBswap) {
7934     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7935   } else if (NeedRotate) {
7936     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7937     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7938     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7939   }
7940 
7941   SDValue NewStore =
7942       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7943                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7944 
7945   // Rely on other DAG combine rules to remove the other individual stores.
7946   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7947   return NewStore;
7948 }
7949 
7950 /// Match a pattern where a wide type scalar value is loaded by several narrow
7951 /// loads and combined by shifts and ors. Fold it into a single load or a load
7952 /// and a BSWAP if the targets supports it.
7953 ///
7954 /// Assuming little endian target:
7955 ///  i8 *a = ...
7956 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7957 /// =>
7958 ///  i32 val = *((i32)a)
7959 ///
7960 ///  i8 *a = ...
7961 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7962 /// =>
7963 ///  i32 val = BSWAP(*((i32)a))
7964 ///
7965 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7966 /// interact well with the worklist mechanism. When a part of the pattern is
7967 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7968 /// but the root node of the pattern which triggers the load combine is not
7969 /// necessarily a direct user of the changed node. For example, once the address
7970 /// of t28 load is reassociated load combine won't be triggered:
7971 ///             t25: i32 = add t4, Constant:i32<2>
7972 ///           t26: i64 = sign_extend t25
7973 ///        t27: i64 = add t2, t26
7974 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7975 ///     t29: i32 = zero_extend t28
7976 ///   t32: i32 = shl t29, Constant:i8<8>
7977 /// t33: i32 = or t23, t32
7978 /// As a possible fix visitLoad can check if the load can be a part of a load
7979 /// combine pattern and add corresponding OR roots to the worklist.
7980 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7981   assert(N->getOpcode() == ISD::OR &&
7982          "Can only match load combining against OR nodes");
7983 
7984   // Handles simple types only
7985   EVT VT = N->getValueType(0);
7986   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7987     return SDValue();
7988   unsigned ByteWidth = VT.getSizeInBits() / 8;
7989 
7990   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7991   auto MemoryByteOffset = [&] (ByteProvider P) {
7992     assert(P.isMemory() && "Must be a memory byte provider");
7993     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7994     assert(LoadBitWidth % 8 == 0 &&
7995            "can only analyze providers for individual bytes not bit");
7996     unsigned LoadByteWidth = LoadBitWidth / 8;
7997     return IsBigEndianTarget
7998             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7999             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
8000   };
8001 
8002   Optional<BaseIndexOffset> Base;
8003   SDValue Chain;
8004 
8005   SmallPtrSet<LoadSDNode *, 8> Loads;
8006   Optional<ByteProvider> FirstByteProvider;
8007   int64_t FirstOffset = INT64_MAX;
8008 
8009   // Check if all the bytes of the OR we are looking at are loaded from the same
8010   // base address. Collect bytes offsets from Base address in ByteOffsets.
8011   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8012   unsigned ZeroExtendedBytes = 0;
8013   for (int i = ByteWidth - 1; i >= 0; --i) {
8014     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
8015     if (!P)
8016       return SDValue();
8017 
8018     if (P->isConstantZero()) {
8019       // It's OK for the N most significant bytes to be 0, we can just
8020       // zero-extend the load.
8021       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
8022         return SDValue();
8023       continue;
8024     }
8025     assert(P->isMemory() && "provenance should either be memory or zero");
8026 
8027     LoadSDNode *L = P->Load;
8028     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
8029            !L->isIndexed() &&
8030            "Must be enforced by calculateByteProvider");
8031     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
8032 
8033     // All loads must share the same chain
8034     SDValue LChain = L->getChain();
8035     if (!Chain)
8036       Chain = LChain;
8037     else if (Chain != LChain)
8038       return SDValue();
8039 
8040     // Loads must share the same base address
8041     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
8042     int64_t ByteOffsetFromBase = 0;
8043     if (!Base)
8044       Base = Ptr;
8045     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8046       return SDValue();
8047 
8048     // Calculate the offset of the current byte from the base address
8049     ByteOffsetFromBase += MemoryByteOffset(*P);
8050     ByteOffsets[i] = ByteOffsetFromBase;
8051 
8052     // Remember the first byte load
8053     if (ByteOffsetFromBase < FirstOffset) {
8054       FirstByteProvider = P;
8055       FirstOffset = ByteOffsetFromBase;
8056     }
8057 
8058     Loads.insert(L);
8059   }
8060   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
8061          "memory, so there must be at least one load which produces the value");
8062   assert(Base && "Base address of the accessed memory location must be set");
8063   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8064 
8065   bool NeedsZext = ZeroExtendedBytes > 0;
8066 
8067   EVT MemVT =
8068       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
8069 
8070   if (!MemVT.isSimple())
8071     return SDValue();
8072 
8073   // Before legalize we can introduce too wide illegal loads which will be later
8074   // split into legal sized loads. This enables us to combine i64 load by i8
8075   // patterns to a couple of i32 loads on 32 bit targets.
8076   if (LegalOperations &&
8077       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
8078                             MemVT))
8079     return SDValue();
8080 
8081   // Check if the bytes of the OR we are looking at match with either big or
8082   // little endian value load
8083   Optional<bool> IsBigEndian = isBigEndian(
8084       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
8085   if (!IsBigEndian.hasValue())
8086     return SDValue();
8087 
8088   assert(FirstByteProvider && "must be set");
8089 
8090   // Ensure that the first byte is loaded from zero offset of the first load.
8091   // So the combined value can be loaded from the first load address.
8092   if (MemoryByteOffset(*FirstByteProvider) != 0)
8093     return SDValue();
8094   LoadSDNode *FirstLoad = FirstByteProvider->Load;
8095 
8096   // The node we are looking at matches with the pattern, check if we can
8097   // replace it with a single (possibly zero-extended) load and bswap + shift if
8098   // needed.
8099 
8100   // If the load needs byte swap check if the target supports it
8101   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
8102 
8103   // Before legalize we can introduce illegal bswaps which will be later
8104   // converted to an explicit bswap sequence. This way we end up with a single
8105   // load and byte shuffling instead of several loads and byte shuffling.
8106   // We do not introduce illegal bswaps when zero-extending as this tends to
8107   // introduce too many arithmetic instructions.
8108   if (NeedsBswap && (LegalOperations || NeedsZext) &&
8109       !TLI.isOperationLegal(ISD::BSWAP, VT))
8110     return SDValue();
8111 
8112   // If we need to bswap and zero extend, we have to insert a shift. Check that
8113   // it is legal.
8114   if (NeedsBswap && NeedsZext && LegalOperations &&
8115       !TLI.isOperationLegal(ISD::SHL, VT))
8116     return SDValue();
8117 
8118   // Check that a load of the wide type is both allowed and fast on the target
8119   bool Fast = false;
8120   bool Allowed =
8121       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
8122                              *FirstLoad->getMemOperand(), &Fast);
8123   if (!Allowed || !Fast)
8124     return SDValue();
8125 
8126   SDValue NewLoad =
8127       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
8128                      Chain, FirstLoad->getBasePtr(),
8129                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
8130 
8131   // Transfer chain users from old loads to the new load.
8132   for (LoadSDNode *L : Loads)
8133     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
8134 
8135   if (!NeedsBswap)
8136     return NewLoad;
8137 
8138   SDValue ShiftedLoad =
8139       NeedsZext
8140           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
8141                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
8142                                                    SDLoc(N), LegalOperations))
8143           : NewLoad;
8144   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
8145 }
8146 
8147 // If the target has andn, bsl, or a similar bit-select instruction,
8148 // we want to unfold masked merge, with canonical pattern of:
8149 //   |        A  |  |B|
8150 //   ((x ^ y) & m) ^ y
8151 //    |  D  |
8152 // Into:
8153 //   (x & m) | (y & ~m)
8154 // If y is a constant, m is not a 'not', and the 'andn' does not work with
8155 // immediates, we unfold into a different pattern:
8156 //   ~(~x & m) & (m | y)
8157 // If x is a constant, m is a 'not', and the 'andn' does not work with
8158 // immediates, we unfold into a different pattern:
8159 //   (x | ~m) & ~(~m & ~y)
8160 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
8161 //       the very least that breaks andnpd / andnps patterns, and because those
8162 //       patterns are simplified in IR and shouldn't be created in the DAG
8163 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
8164   assert(N->getOpcode() == ISD::XOR);
8165 
8166   // Don't touch 'not' (i.e. where y = -1).
8167   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
8168     return SDValue();
8169 
8170   EVT VT = N->getValueType(0);
8171 
8172   // There are 3 commutable operators in the pattern,
8173   // so we have to deal with 8 possible variants of the basic pattern.
8174   SDValue X, Y, M;
8175   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
8176     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
8177       return false;
8178     SDValue Xor = And.getOperand(XorIdx);
8179     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
8180       return false;
8181     SDValue Xor0 = Xor.getOperand(0);
8182     SDValue Xor1 = Xor.getOperand(1);
8183     // Don't touch 'not' (i.e. where y = -1).
8184     if (isAllOnesOrAllOnesSplat(Xor1))
8185       return false;
8186     if (Other == Xor0)
8187       std::swap(Xor0, Xor1);
8188     if (Other != Xor1)
8189       return false;
8190     X = Xor0;
8191     Y = Xor1;
8192     M = And.getOperand(XorIdx ? 0 : 1);
8193     return true;
8194   };
8195 
8196   SDValue N0 = N->getOperand(0);
8197   SDValue N1 = N->getOperand(1);
8198   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
8199       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
8200     return SDValue();
8201 
8202   // Don't do anything if the mask is constant. This should not be reachable.
8203   // InstCombine should have already unfolded this pattern, and DAGCombiner
8204   // probably shouldn't produce it, too.
8205   if (isa<ConstantSDNode>(M.getNode()))
8206     return SDValue();
8207 
8208   // We can transform if the target has AndNot
8209   if (!TLI.hasAndNot(M))
8210     return SDValue();
8211 
8212   SDLoc DL(N);
8213 
8214   // If Y is a constant, check that 'andn' works with immediates. Unless M is
8215   // a bitwise not that would already allow ANDN to be used.
8216   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8217     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8218     // If not, we need to do a bit more work to make sure andn is still used.
8219     SDValue NotX = DAG.getNOT(DL, X, VT);
8220     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8221     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8222     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8223     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8224   }
8225 
8226   // If X is a constant and M is a bitwise not, check that 'andn' works with
8227   // immediates.
8228   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8229     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8230     // If not, we need to do a bit more work to make sure andn is still used.
8231     SDValue NotM = M.getOperand(0);
8232     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8233     SDValue NotY = DAG.getNOT(DL, Y, VT);
8234     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8235     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8236     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8237   }
8238 
8239   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8240   SDValue NotM = DAG.getNOT(DL, M, VT);
8241   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8242 
8243   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8244 }
8245 
8246 SDValue DAGCombiner::visitXOR(SDNode *N) {
8247   SDValue N0 = N->getOperand(0);
8248   SDValue N1 = N->getOperand(1);
8249   EVT VT = N0.getValueType();
8250   SDLoc DL(N);
8251 
8252   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8253   if (N0.isUndef() && N1.isUndef())
8254     return DAG.getConstant(0, DL, VT);
8255 
8256   // fold (xor x, undef) -> undef
8257   if (N0.isUndef())
8258     return N0;
8259   if (N1.isUndef())
8260     return N1;
8261 
8262   // fold (xor c1, c2) -> c1^c2
8263   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8264     return C;
8265 
8266   // canonicalize constant to RHS
8267   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8268       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8269     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8270 
8271   // fold vector ops
8272   if (VT.isVector()) {
8273     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8274       return FoldedVOp;
8275 
8276     // fold (xor x, 0) -> x, vector edition
8277     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8278       return N0;
8279   }
8280 
8281   // fold (xor x, 0) -> x
8282   if (isNullConstant(N1))
8283     return N0;
8284 
8285   if (SDValue NewSel = foldBinOpIntoSelect(N))
8286     return NewSel;
8287 
8288   // reassociate xor
8289   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8290     return RXOR;
8291 
8292   // fold !(x cc y) -> (x !cc y)
8293   unsigned N0Opcode = N0.getOpcode();
8294   SDValue LHS, RHS, CC;
8295   if (TLI.isConstTrueVal(N1) &&
8296       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8297     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8298                                                LHS.getValueType());
8299     if (!LegalOperations ||
8300         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8301       switch (N0Opcode) {
8302       default:
8303         llvm_unreachable("Unhandled SetCC Equivalent!");
8304       case ISD::SETCC:
8305         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8306       case ISD::SELECT_CC:
8307         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8308                                N0.getOperand(3), NotCC);
8309       case ISD::STRICT_FSETCC:
8310       case ISD::STRICT_FSETCCS: {
8311         if (N0.hasOneUse()) {
8312           // FIXME Can we handle multiple uses? Could we token factor the chain
8313           // results from the new/old setcc?
8314           SDValue SetCC =
8315               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8316                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8317           CombineTo(N, SetCC);
8318           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8319           recursivelyDeleteUnusedNodes(N0.getNode());
8320           return SDValue(N, 0); // Return N so it doesn't get rechecked!
8321         }
8322         break;
8323       }
8324       }
8325     }
8326   }
8327 
8328   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8329   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8330       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8331     SDValue V = N0.getOperand(0);
8332     SDLoc DL0(N0);
8333     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8334                     DAG.getConstant(1, DL0, V.getValueType()));
8335     AddToWorklist(V.getNode());
8336     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8337   }
8338 
8339   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8340   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8341       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8342     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8343     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8344       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8345       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8346       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8347       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8348       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8349     }
8350   }
8351   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8352   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8353       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8354     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8355     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8356       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8357       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8358       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8359       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8360       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8361     }
8362   }
8363 
8364   // fold (not (neg x)) -> (add X, -1)
8365   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8366   // Y is a constant or the subtract has a single use.
8367   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8368       isNullConstant(N0.getOperand(0))) {
8369     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8370                        DAG.getAllOnesConstant(DL, VT));
8371   }
8372 
8373   // fold (not (add X, -1)) -> (neg X)
8374   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8375       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8376     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8377                        N0.getOperand(0));
8378   }
8379 
8380   // fold (xor (and x, y), y) -> (and (not x), y)
8381   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8382     SDValue X = N0.getOperand(0);
8383     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8384     AddToWorklist(NotX.getNode());
8385     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8386   }
8387 
8388   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8389     ConstantSDNode *XorC = isConstOrConstSplat(N1);
8390     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8391     unsigned BitWidth = VT.getScalarSizeInBits();
8392     if (XorC && ShiftC) {
8393       // Don't crash on an oversized shift. We can not guarantee that a bogus
8394       // shift has been simplified to undef.
8395       uint64_t ShiftAmt = ShiftC->getLimitedValue();
8396       if (ShiftAmt < BitWidth) {
8397         APInt Ones = APInt::getAllOnes(BitWidth);
8398         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8399         if (XorC->getAPIntValue() == Ones) {
8400           // If the xor constant is a shifted -1, do a 'not' before the shift:
8401           // xor (X << ShiftC), XorC --> (not X) << ShiftC
8402           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8403           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8404           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8405         }
8406       }
8407     }
8408   }
8409 
8410   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8411   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8412     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8413     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8414     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8415       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8416       SDValue S0 = S.getOperand(0);
8417       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8418         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8419           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8420             return DAG.getNode(ISD::ABS, DL, VT, S0);
8421     }
8422   }
8423 
8424   // fold (xor x, x) -> 0
8425   if (N0 == N1)
8426     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8427 
8428   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8429   // Here is a concrete example of this equivalence:
8430   // i16   x ==  14
8431   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
8432   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8433   //
8434   // =>
8435   //
8436   // i16     ~1      == 0b1111111111111110
8437   // i16 rol(~1, 14) == 0b1011111111111111
8438   //
8439   // Some additional tips to help conceptualize this transform:
8440   // - Try to see the operation as placing a single zero in a value of all ones.
8441   // - There exists no value for x which would allow the result to contain zero.
8442   // - Values of x larger than the bitwidth are undefined and do not require a
8443   //   consistent result.
8444   // - Pushing the zero left requires shifting one bits in from the right.
8445   // A rotate left of ~1 is a nice way of achieving the desired result.
8446   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8447       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8448     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8449                        N0.getOperand(1));
8450   }
8451 
8452   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
8453   if (N0Opcode == N1.getOpcode())
8454     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8455       return V;
8456 
8457   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8458     return R;
8459   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8460     return R;
8461 
8462   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
8463   if (SDValue MM = unfoldMaskedMerge(N))
8464     return MM;
8465 
8466   // Simplify the expression using non-local knowledge.
8467   if (SimplifyDemandedBits(SDValue(N, 0)))
8468     return SDValue(N, 0);
8469 
8470   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8471     return Combined;
8472 
8473   return SDValue();
8474 }
8475 
8476 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8477 /// shift-by-constant operand with identical opcode, we may be able to convert
8478 /// that into 2 independent shifts followed by the logic op. This is a
8479 /// throughput improvement.
8480 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8481   // Match a one-use bitwise logic op.
8482   SDValue LogicOp = Shift->getOperand(0);
8483   if (!LogicOp.hasOneUse())
8484     return SDValue();
8485 
8486   unsigned LogicOpcode = LogicOp.getOpcode();
8487   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8488       LogicOpcode != ISD::XOR)
8489     return SDValue();
8490 
8491   // Find a matching one-use shift by constant.
8492   unsigned ShiftOpcode = Shift->getOpcode();
8493   SDValue C1 = Shift->getOperand(1);
8494   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8495   assert(C1Node && "Expected a shift with constant operand");
8496   const APInt &C1Val = C1Node->getAPIntValue();
8497   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8498                              const APInt *&ShiftAmtVal) {
8499     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8500       return false;
8501 
8502     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8503     if (!ShiftCNode)
8504       return false;
8505 
8506     // Capture the shifted operand and shift amount value.
8507     ShiftOp = V.getOperand(0);
8508     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8509 
8510     // Shift amount types do not have to match their operand type, so check that
8511     // the constants are the same width.
8512     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8513       return false;
8514 
8515     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8516     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8517       return false;
8518 
8519     return true;
8520   };
8521 
8522   // Logic ops are commutative, so check each operand for a match.
8523   SDValue X, Y;
8524   const APInt *C0Val;
8525   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8526     Y = LogicOp.getOperand(1);
8527   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8528     Y = LogicOp.getOperand(0);
8529   else
8530     return SDValue();
8531 
8532   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8533   SDLoc DL(Shift);
8534   EVT VT = Shift->getValueType(0);
8535   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8536   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8537   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8538   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8539   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8540 }
8541 
8542 /// Handle transforms common to the three shifts, when the shift amount is a
8543 /// constant.
8544 /// We are looking for: (shift being one of shl/sra/srl)
8545 ///   shift (binop X, C0), C1
8546 /// And want to transform into:
8547 ///   binop (shift X, C1), (shift C0, C1)
8548 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8549   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8550 
8551   // Do not turn a 'not' into a regular xor.
8552   if (isBitwiseNot(N->getOperand(0)))
8553     return SDValue();
8554 
8555   // The inner binop must be one-use, since we want to replace it.
8556   SDValue LHS = N->getOperand(0);
8557   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8558     return SDValue();
8559 
8560   // TODO: This is limited to early combining because it may reveal regressions
8561   //       otherwise. But since we just checked a target hook to see if this is
8562   //       desirable, that should have filtered out cases where this interferes
8563   //       with some other pattern matching.
8564   if (!LegalTypes)
8565     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8566       return R;
8567 
8568   // We want to pull some binops through shifts, so that we have (and (shift))
8569   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8570   // thing happens with address calculations, so it's important to canonicalize
8571   // it.
8572   switch (LHS.getOpcode()) {
8573   default:
8574     return SDValue();
8575   case ISD::OR:
8576   case ISD::XOR:
8577   case ISD::AND:
8578     break;
8579   case ISD::ADD:
8580     if (N->getOpcode() != ISD::SHL)
8581       return SDValue(); // only shl(add) not sr[al](add).
8582     break;
8583   }
8584 
8585   // We require the RHS of the binop to be a constant and not opaque as well.
8586   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8587   if (!BinOpCst)
8588     return SDValue();
8589 
8590   // FIXME: disable this unless the input to the binop is a shift by a constant
8591   // or is copy/select. Enable this in other cases when figure out it's exactly
8592   // profitable.
8593   SDValue BinOpLHSVal = LHS.getOperand(0);
8594   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8595                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8596                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8597                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8598   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8599                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8600 
8601   if (!IsShiftByConstant && !IsCopyOrSelect)
8602     return SDValue();
8603 
8604   if (IsCopyOrSelect && N->hasOneUse())
8605     return SDValue();
8606 
8607   // Fold the constants, shifting the binop RHS by the shift amount.
8608   SDLoc DL(N);
8609   EVT VT = N->getValueType(0);
8610   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8611                                N->getOperand(1));
8612   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8613 
8614   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8615                                  N->getOperand(1));
8616   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8617 }
8618 
8619 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8620   assert(N->getOpcode() == ISD::TRUNCATE);
8621   assert(N->getOperand(0).getOpcode() == ISD::AND);
8622 
8623   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8624   EVT TruncVT = N->getValueType(0);
8625   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8626       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8627     SDValue N01 = N->getOperand(0).getOperand(1);
8628     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8629       SDLoc DL(N);
8630       SDValue N00 = N->getOperand(0).getOperand(0);
8631       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8632       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8633       AddToWorklist(Trunc00.getNode());
8634       AddToWorklist(Trunc01.getNode());
8635       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8636     }
8637   }
8638 
8639   return SDValue();
8640 }
8641 
8642 SDValue DAGCombiner::visitRotate(SDNode *N) {
8643   SDLoc dl(N);
8644   SDValue N0 = N->getOperand(0);
8645   SDValue N1 = N->getOperand(1);
8646   EVT VT = N->getValueType(0);
8647   unsigned Bitsize = VT.getScalarSizeInBits();
8648 
8649   // fold (rot x, 0) -> x
8650   if (isNullOrNullSplat(N1))
8651     return N0;
8652 
8653   // fold (rot x, c) -> x iff (c % BitSize) == 0
8654   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8655     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8656     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8657       return N0;
8658   }
8659 
8660   // fold (rot x, c) -> (rot x, c % BitSize)
8661   bool OutOfRange = false;
8662   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8663     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8664     return true;
8665   };
8666   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8667     EVT AmtVT = N1.getValueType();
8668     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8669     if (SDValue Amt =
8670             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8671       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8672   }
8673 
8674   // rot i16 X, 8 --> bswap X
8675   auto *RotAmtC = isConstOrConstSplat(N1);
8676   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8677       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8678     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8679 
8680   // Simplify the operands using demanded-bits information.
8681   if (SimplifyDemandedBits(SDValue(N, 0)))
8682     return SDValue(N, 0);
8683 
8684   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8685   if (N1.getOpcode() == ISD::TRUNCATE &&
8686       N1.getOperand(0).getOpcode() == ISD::AND) {
8687     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8688       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8689   }
8690 
8691   unsigned NextOp = N0.getOpcode();
8692   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8693   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8694     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8695     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8696     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8697       EVT ShiftVT = C1->getValueType(0);
8698       bool SameSide = (N->getOpcode() == NextOp);
8699       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8700       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8701               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8702         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8703         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8704             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8705         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8706                            CombinedShiftNorm);
8707       }
8708     }
8709   }
8710   return SDValue();
8711 }
8712 
8713 SDValue DAGCombiner::visitSHL(SDNode *N) {
8714   SDValue N0 = N->getOperand(0);
8715   SDValue N1 = N->getOperand(1);
8716   if (SDValue V = DAG.simplifyShift(N0, N1))
8717     return V;
8718 
8719   EVT VT = N0.getValueType();
8720   EVT ShiftVT = N1.getValueType();
8721   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8722 
8723   // fold (shl c1, c2) -> c1<<c2
8724   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8725     return C;
8726 
8727   // fold vector ops
8728   if (VT.isVector()) {
8729     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8730       return FoldedVOp;
8731 
8732     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8733     // If setcc produces all-one true value then:
8734     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8735     if (N1CV && N1CV->isConstant()) {
8736       if (N0.getOpcode() == ISD::AND) {
8737         SDValue N00 = N0->getOperand(0);
8738         SDValue N01 = N0->getOperand(1);
8739         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8740 
8741         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8742             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8743                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8744           if (SDValue C =
8745                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8746             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8747         }
8748       }
8749     }
8750   }
8751 
8752   if (SDValue NewSel = foldBinOpIntoSelect(N))
8753     return NewSel;
8754 
8755   // if (shl x, c) is known to be zero, return 0
8756   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8757     return DAG.getConstant(0, SDLoc(N), VT);
8758 
8759   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8760   if (N1.getOpcode() == ISD::TRUNCATE &&
8761       N1.getOperand(0).getOpcode() == ISD::AND) {
8762     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8763       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8764   }
8765 
8766   if (SimplifyDemandedBits(SDValue(N, 0)))
8767     return SDValue(N, 0);
8768 
8769   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8770   if (N0.getOpcode() == ISD::SHL) {
8771     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8772                                           ConstantSDNode *RHS) {
8773       APInt c1 = LHS->getAPIntValue();
8774       APInt c2 = RHS->getAPIntValue();
8775       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8776       return (c1 + c2).uge(OpSizeInBits);
8777     };
8778     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8779       return DAG.getConstant(0, SDLoc(N), VT);
8780 
8781     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8782                                        ConstantSDNode *RHS) {
8783       APInt c1 = LHS->getAPIntValue();
8784       APInt c2 = RHS->getAPIntValue();
8785       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8786       return (c1 + c2).ult(OpSizeInBits);
8787     };
8788     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8789       SDLoc DL(N);
8790       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8791       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8792     }
8793   }
8794 
8795   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8796   // For this to be valid, the second form must not preserve any of the bits
8797   // that are shifted out by the inner shift in the first form.  This means
8798   // the outer shift size must be >= the number of bits added by the ext.
8799   // As a corollary, we don't care what kind of ext it is.
8800   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8801        N0.getOpcode() == ISD::ANY_EXTEND ||
8802        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8803       N0.getOperand(0).getOpcode() == ISD::SHL) {
8804     SDValue N0Op0 = N0.getOperand(0);
8805     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8806     EVT InnerVT = N0Op0.getValueType();
8807     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8808 
8809     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8810                                                          ConstantSDNode *RHS) {
8811       APInt c1 = LHS->getAPIntValue();
8812       APInt c2 = RHS->getAPIntValue();
8813       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8814       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8815              (c1 + c2).uge(OpSizeInBits);
8816     };
8817     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8818                                   /*AllowUndefs*/ false,
8819                                   /*AllowTypeMismatch*/ true))
8820       return DAG.getConstant(0, SDLoc(N), VT);
8821 
8822     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8823                                                       ConstantSDNode *RHS) {
8824       APInt c1 = LHS->getAPIntValue();
8825       APInt c2 = RHS->getAPIntValue();
8826       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8827       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8828              (c1 + c2).ult(OpSizeInBits);
8829     };
8830     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8831                                   /*AllowUndefs*/ false,
8832                                   /*AllowTypeMismatch*/ true)) {
8833       SDLoc DL(N);
8834       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8835       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8836       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8837       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8838     }
8839   }
8840 
8841   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8842   // Only fold this if the inner zext has no other uses to avoid increasing
8843   // the total number of instructions.
8844   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8845       N0.getOperand(0).getOpcode() == ISD::SRL) {
8846     SDValue N0Op0 = N0.getOperand(0);
8847     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8848 
8849     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8850       APInt c1 = LHS->getAPIntValue();
8851       APInt c2 = RHS->getAPIntValue();
8852       zeroExtendToMatch(c1, c2);
8853       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8854     };
8855     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8856                                   /*AllowUndefs*/ false,
8857                                   /*AllowTypeMismatch*/ true)) {
8858       SDLoc DL(N);
8859       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8860       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8861       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8862       AddToWorklist(NewSHL.getNode());
8863       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8864     }
8865   }
8866 
8867   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8868   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8869   // TODO - support non-uniform vector shift amounts.
8870   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8871   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8872       N0->getFlags().hasExact()) {
8873     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8874       uint64_t C1 = N0C1->getZExtValue();
8875       uint64_t C2 = N1C->getZExtValue();
8876       SDLoc DL(N);
8877       if (C1 <= C2)
8878         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8879                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8880       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8881                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8882     }
8883   }
8884 
8885   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8886   //                               (and (srl x, (sub c1, c2), MASK)
8887   // Only fold this if the inner shift has no other uses -- if it does, folding
8888   // this will increase the total number of instructions.
8889   // TODO - drop hasOneUse requirement if c1 == c2?
8890   // TODO - support non-uniform vector shift amounts.
8891   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8892       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8893     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8894       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8895         uint64_t c1 = N0C1->getZExtValue();
8896         uint64_t c2 = N1C->getZExtValue();
8897         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8898         SDValue Shift;
8899         if (c2 > c1) {
8900           Mask <<= c2 - c1;
8901           SDLoc DL(N);
8902           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8903                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8904         } else {
8905           Mask.lshrInPlace(c1 - c2);
8906           SDLoc DL(N);
8907           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8908                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8909         }
8910         SDLoc DL(N0);
8911         return DAG.getNode(ISD::AND, DL, VT, Shift,
8912                            DAG.getConstant(Mask, DL, VT));
8913       }
8914     }
8915   }
8916 
8917   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8918   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8919       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8920     SDLoc DL(N);
8921     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8922     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8923     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8924   }
8925 
8926   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8927   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8928   // Variant of version done on multiply, except mul by a power of 2 is turned
8929   // into a shift.
8930   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8931       N0->hasOneUse() &&
8932       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8933       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8934       TLI.isDesirableToCommuteWithShift(N, Level)) {
8935     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8936     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8937     AddToWorklist(Shl0.getNode());
8938     AddToWorklist(Shl1.getNode());
8939     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8940   }
8941 
8942   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8943   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse() &&
8944       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8945       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8946     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8947     if (isConstantOrConstantVector(Shl))
8948       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8949   }
8950 
8951   if (N1C && !N1C->isOpaque())
8952     if (SDValue NewSHL = visitShiftByConstant(N))
8953       return NewSHL;
8954 
8955   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8956   if (N0.getOpcode() == ISD::VSCALE)
8957     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8958       const APInt &C0 = N0.getConstantOperandAPInt(0);
8959       const APInt &C1 = NC1->getAPIntValue();
8960       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8961     }
8962 
8963   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8964   APInt ShlVal;
8965   if (N0.getOpcode() == ISD::STEP_VECTOR)
8966     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8967       const APInt &C0 = N0.getConstantOperandAPInt(0);
8968       if (ShlVal.ult(C0.getBitWidth())) {
8969         APInt NewStep = C0 << ShlVal;
8970         return DAG.getStepVector(SDLoc(N), VT, NewStep);
8971       }
8972     }
8973 
8974   return SDValue();
8975 }
8976 
8977 // Transform a right shift of a multiply into a multiply-high.
8978 // Examples:
8979 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8980 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8981 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8982                                   const TargetLowering &TLI) {
8983   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8984          "SRL or SRA node is required here!");
8985 
8986   // Check the shift amount. Proceed with the transformation if the shift
8987   // amount is constant.
8988   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8989   if (!ShiftAmtSrc)
8990     return SDValue();
8991 
8992   SDLoc DL(N);
8993 
8994   // The operation feeding into the shift must be a multiply.
8995   SDValue ShiftOperand = N->getOperand(0);
8996   if (ShiftOperand.getOpcode() != ISD::MUL)
8997     return SDValue();
8998 
8999   // Both operands must be equivalent extend nodes.
9000   SDValue LeftOp = ShiftOperand.getOperand(0);
9001   SDValue RightOp = ShiftOperand.getOperand(1);
9002 
9003   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9004   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9005 
9006   if (!IsSignExt && !IsZeroExt)
9007     return SDValue();
9008 
9009   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9010   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9011 
9012   SDValue MulhRightOp;
9013   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9014     unsigned ActiveBits = IsSignExt
9015                               ? Constant->getAPIntValue().getMinSignedBits()
9016                               : Constant->getAPIntValue().getActiveBits();
9017     if (ActiveBits > NarrowVTSize)
9018       return SDValue();
9019     MulhRightOp = DAG.getConstant(
9020         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9021         NarrowVT);
9022   } else {
9023     if (LeftOp.getOpcode() != RightOp.getOpcode())
9024       return SDValue();
9025     // Check that the two extend nodes are the same type.
9026     if (NarrowVT != RightOp.getOperand(0).getValueType())
9027       return SDValue();
9028     MulhRightOp = RightOp.getOperand(0);
9029   }
9030 
9031   EVT WideVT = LeftOp.getValueType();
9032   // Proceed with the transformation if the wide types match.
9033   assert((WideVT == RightOp.getValueType()) &&
9034          "Cannot have a multiply node with two different operand types.");
9035 
9036   // Proceed with the transformation if the wide type is twice as large
9037   // as the narrow type.
9038   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9039     return SDValue();
9040 
9041   // Check the shift amount with the narrow type size.
9042   // Proceed with the transformation if the shift amount is the width
9043   // of the narrow type.
9044   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9045   if (ShiftAmt != NarrowVTSize)
9046     return SDValue();
9047 
9048   // If the operation feeding into the MUL is a sign extend (sext),
9049   // we use mulhs. Othewise, zero extends (zext) use mulhu.
9050   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
9051 
9052   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
9053   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
9054     return SDValue();
9055 
9056   SDValue Result =
9057       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
9058   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
9059                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
9060 }
9061 
9062 SDValue DAGCombiner::visitSRA(SDNode *N) {
9063   SDValue N0 = N->getOperand(0);
9064   SDValue N1 = N->getOperand(1);
9065   if (SDValue V = DAG.simplifyShift(N0, N1))
9066     return V;
9067 
9068   EVT VT = N0.getValueType();
9069   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9070 
9071   // fold (sra c1, c2) -> (sra c1, c2)
9072   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
9073     return C;
9074 
9075   // Arithmetic shifting an all-sign-bit value is a no-op.
9076   // fold (sra 0, x) -> 0
9077   // fold (sra -1, x) -> -1
9078   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
9079     return N0;
9080 
9081   // fold vector ops
9082   if (VT.isVector())
9083     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9084       return FoldedVOp;
9085 
9086   if (SDValue NewSel = foldBinOpIntoSelect(N))
9087     return NewSel;
9088 
9089   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
9090   // sext_inreg.
9091   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9092   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
9093     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
9094     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
9095     if (VT.isVector())
9096       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
9097                                VT.getVectorElementCount());
9098     if (!LegalOperations ||
9099         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
9100         TargetLowering::Legal)
9101       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9102                          N0.getOperand(0), DAG.getValueType(ExtVT));
9103     // Even if we can't convert to sext_inreg, we might be able to remove
9104     // this shift pair if the input is already sign extended.
9105     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
9106       return N0.getOperand(0);
9107   }
9108 
9109   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
9110   // clamp (add c1, c2) to max shift.
9111   if (N0.getOpcode() == ISD::SRA) {
9112     SDLoc DL(N);
9113     EVT ShiftVT = N1.getValueType();
9114     EVT ShiftSVT = ShiftVT.getScalarType();
9115     SmallVector<SDValue, 16> ShiftValues;
9116 
9117     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9118       APInt c1 = LHS->getAPIntValue();
9119       APInt c2 = RHS->getAPIntValue();
9120       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9121       APInt Sum = c1 + c2;
9122       unsigned ShiftSum =
9123           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
9124       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
9125       return true;
9126     };
9127     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
9128       SDValue ShiftValue;
9129       if (N1.getOpcode() == ISD::BUILD_VECTOR)
9130         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
9131       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
9132         assert(ShiftValues.size() == 1 &&
9133                "Expected matchBinaryPredicate to return one element for "
9134                "SPLAT_VECTORs");
9135         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
9136       } else
9137         ShiftValue = ShiftValues[0];
9138       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
9139     }
9140   }
9141 
9142   // fold (sra (shl X, m), (sub result_size, n))
9143   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
9144   // result_size - n != m.
9145   // If truncate is free for the target sext(shl) is likely to result in better
9146   // code.
9147   if (N0.getOpcode() == ISD::SHL && N1C) {
9148     // Get the two constanst of the shifts, CN0 = m, CN = n.
9149     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
9150     if (N01C) {
9151       LLVMContext &Ctx = *DAG.getContext();
9152       // Determine what the truncate's result bitsize and type would be.
9153       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
9154 
9155       if (VT.isVector())
9156         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9157 
9158       // Determine the residual right-shift amount.
9159       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
9160 
9161       // If the shift is not a no-op (in which case this should be just a sign
9162       // extend already), the truncated to type is legal, sign_extend is legal
9163       // on that type, and the truncate to that type is both legal and free,
9164       // perform the transform.
9165       if ((ShiftAmt > 0) &&
9166           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
9167           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
9168           TLI.isTruncateFree(VT, TruncVT)) {
9169         SDLoc DL(N);
9170         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
9171             getShiftAmountTy(N0.getOperand(0).getValueType()));
9172         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
9173                                     N0.getOperand(0), Amt);
9174         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
9175                                     Shift);
9176         return DAG.getNode(ISD::SIGN_EXTEND, DL,
9177                            N->getValueType(0), Trunc);
9178       }
9179     }
9180   }
9181 
9182   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
9183   //   sra (add (shl X, N1C), AddC), N1C -->
9184   //   sext (add (trunc X to (width - N1C)), AddC')
9185   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
9186       N0.getOperand(0).getOpcode() == ISD::SHL &&
9187       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
9188     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
9189       SDValue Shl = N0.getOperand(0);
9190       // Determine what the truncate's type would be and ask the target if that
9191       // is a free operation.
9192       LLVMContext &Ctx = *DAG.getContext();
9193       unsigned ShiftAmt = N1C->getZExtValue();
9194       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
9195       if (VT.isVector())
9196         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9197 
9198       // TODO: The simple type check probably belongs in the default hook
9199       //       implementation and/or target-specific overrides (because
9200       //       non-simple types likely require masking when legalized), but that
9201       //       restriction may conflict with other transforms.
9202       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
9203           TLI.isTruncateFree(VT, TruncVT)) {
9204         SDLoc DL(N);
9205         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
9206         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
9207                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
9208         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
9209         return DAG.getSExtOrTrunc(Add, DL, VT);
9210       }
9211     }
9212   }
9213 
9214   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9215   if (N1.getOpcode() == ISD::TRUNCATE &&
9216       N1.getOperand(0).getOpcode() == ISD::AND) {
9217     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9218       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9219   }
9220 
9221   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9222   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9223   //      if c1 is equal to the number of bits the trunc removes
9224   // TODO - support non-uniform vector shift amounts.
9225   if (N0.getOpcode() == ISD::TRUNCATE &&
9226       (N0.getOperand(0).getOpcode() == ISD::SRL ||
9227        N0.getOperand(0).getOpcode() == ISD::SRA) &&
9228       N0.getOperand(0).hasOneUse() &&
9229       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9230     SDValue N0Op0 = N0.getOperand(0);
9231     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9232       EVT LargeVT = N0Op0.getValueType();
9233       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9234       if (LargeShift->getAPIntValue() == TruncBits) {
9235         SDLoc DL(N);
9236         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
9237                                       getShiftAmountTy(LargeVT));
9238         SDValue SRA =
9239             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9240         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9241       }
9242     }
9243   }
9244 
9245   // Simplify, based on bits shifted out of the LHS.
9246   if (SimplifyDemandedBits(SDValue(N, 0)))
9247     return SDValue(N, 0);
9248 
9249   // If the sign bit is known to be zero, switch this to a SRL.
9250   if (DAG.SignBitIsZero(N0))
9251     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9252 
9253   if (N1C && !N1C->isOpaque())
9254     if (SDValue NewSRA = visitShiftByConstant(N))
9255       return NewSRA;
9256 
9257   // Try to transform this shift into a multiply-high if
9258   // it matches the appropriate pattern detected in combineShiftToMULH.
9259   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9260     return MULH;
9261 
9262   // Attempt to convert a sra of a load into a narrower sign-extending load.
9263   if (SDValue NarrowLoad = reduceLoadWidth(N))
9264     return NarrowLoad;
9265 
9266   return SDValue();
9267 }
9268 
9269 SDValue DAGCombiner::visitSRL(SDNode *N) {
9270   SDValue N0 = N->getOperand(0);
9271   SDValue N1 = N->getOperand(1);
9272   if (SDValue V = DAG.simplifyShift(N0, N1))
9273     return V;
9274 
9275   EVT VT = N0.getValueType();
9276   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9277 
9278   // fold (srl c1, c2) -> c1 >>u c2
9279   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9280     return C;
9281 
9282   // fold vector ops
9283   if (VT.isVector())
9284     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9285       return FoldedVOp;
9286 
9287   if (SDValue NewSel = foldBinOpIntoSelect(N))
9288     return NewSel;
9289 
9290   // if (srl x, c) is known to be zero, return 0
9291   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9292   if (N1C &&
9293       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9294     return DAG.getConstant(0, SDLoc(N), VT);
9295 
9296   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9297   if (N0.getOpcode() == ISD::SRL) {
9298     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9299                                           ConstantSDNode *RHS) {
9300       APInt c1 = LHS->getAPIntValue();
9301       APInt c2 = RHS->getAPIntValue();
9302       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9303       return (c1 + c2).uge(OpSizeInBits);
9304     };
9305     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9306       return DAG.getConstant(0, SDLoc(N), VT);
9307 
9308     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9309                                        ConstantSDNode *RHS) {
9310       APInt c1 = LHS->getAPIntValue();
9311       APInt c2 = RHS->getAPIntValue();
9312       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9313       return (c1 + c2).ult(OpSizeInBits);
9314     };
9315     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9316       SDLoc DL(N);
9317       EVT ShiftVT = N1.getValueType();
9318       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9319       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9320     }
9321   }
9322 
9323   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9324       N0.getOperand(0).getOpcode() == ISD::SRL) {
9325     SDValue InnerShift = N0.getOperand(0);
9326     // TODO - support non-uniform vector shift amounts.
9327     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9328       uint64_t c1 = N001C->getZExtValue();
9329       uint64_t c2 = N1C->getZExtValue();
9330       EVT InnerShiftVT = InnerShift.getValueType();
9331       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9332       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9333       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9334       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9335       if (c1 + OpSizeInBits == InnerShiftSize) {
9336         SDLoc DL(N);
9337         if (c1 + c2 >= InnerShiftSize)
9338           return DAG.getConstant(0, DL, VT);
9339         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9340         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9341                                        InnerShift.getOperand(0), NewShiftAmt);
9342         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9343       }
9344       // In the more general case, we can clear the high bits after the shift:
9345       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9346       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9347           c1 + c2 < InnerShiftSize) {
9348         SDLoc DL(N);
9349         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9350         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9351                                        InnerShift.getOperand(0), NewShiftAmt);
9352         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9353                                                             OpSizeInBits - c2),
9354                                        DL, InnerShiftVT);
9355         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9356         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9357       }
9358     }
9359   }
9360 
9361   // fold (srl (shl x, c), c) -> (and x, cst2)
9362   // TODO - (srl (shl x, c1), c2).
9363   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
9364       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
9365     SDLoc DL(N);
9366     SDValue Mask =
9367         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
9368     AddToWorklist(Mask.getNode());
9369     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
9370   }
9371 
9372   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9373   // TODO - support non-uniform vector shift amounts.
9374   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9375     // Shifting in all undef bits?
9376     EVT SmallVT = N0.getOperand(0).getValueType();
9377     unsigned BitSize = SmallVT.getScalarSizeInBits();
9378     if (N1C->getAPIntValue().uge(BitSize))
9379       return DAG.getUNDEF(VT);
9380 
9381     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9382       uint64_t ShiftAmt = N1C->getZExtValue();
9383       SDLoc DL0(N0);
9384       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9385                                        N0.getOperand(0),
9386                           DAG.getConstant(ShiftAmt, DL0,
9387                                           getShiftAmountTy(SmallVT)));
9388       AddToWorklist(SmallShift.getNode());
9389       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9390       SDLoc DL(N);
9391       return DAG.getNode(ISD::AND, DL, VT,
9392                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9393                          DAG.getConstant(Mask, DL, VT));
9394     }
9395   }
9396 
9397   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
9398   // bit, which is unmodified by sra.
9399   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9400     if (N0.getOpcode() == ISD::SRA)
9401       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9402   }
9403 
9404   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
9405   if (N1C && N0.getOpcode() == ISD::CTLZ &&
9406       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9407     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9408 
9409     // If any of the input bits are KnownOne, then the input couldn't be all
9410     // zeros, thus the result of the srl will always be zero.
9411     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9412 
9413     // If all of the bits input the to ctlz node are known to be zero, then
9414     // the result of the ctlz is "32" and the result of the shift is one.
9415     APInt UnknownBits = ~Known.Zero;
9416     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9417 
9418     // Otherwise, check to see if there is exactly one bit input to the ctlz.
9419     if (UnknownBits.isPowerOf2()) {
9420       // Okay, we know that only that the single bit specified by UnknownBits
9421       // could be set on input to the CTLZ node. If this bit is set, the SRL
9422       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9423       // to an SRL/XOR pair, which is likely to simplify more.
9424       unsigned ShAmt = UnknownBits.countTrailingZeros();
9425       SDValue Op = N0.getOperand(0);
9426 
9427       if (ShAmt) {
9428         SDLoc DL(N0);
9429         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9430                   DAG.getConstant(ShAmt, DL,
9431                                   getShiftAmountTy(Op.getValueType())));
9432         AddToWorklist(Op.getNode());
9433       }
9434 
9435       SDLoc DL(N);
9436       return DAG.getNode(ISD::XOR, DL, VT,
9437                          Op, DAG.getConstant(1, DL, VT));
9438     }
9439   }
9440 
9441   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9442   if (N1.getOpcode() == ISD::TRUNCATE &&
9443       N1.getOperand(0).getOpcode() == ISD::AND) {
9444     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9445       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9446   }
9447 
9448   // fold operands of srl based on knowledge that the low bits are not
9449   // demanded.
9450   if (SimplifyDemandedBits(SDValue(N, 0)))
9451     return SDValue(N, 0);
9452 
9453   if (N1C && !N1C->isOpaque())
9454     if (SDValue NewSRL = visitShiftByConstant(N))
9455       return NewSRL;
9456 
9457   // Attempt to convert a srl of a load into a narrower zero-extending load.
9458   if (SDValue NarrowLoad = reduceLoadWidth(N))
9459     return NarrowLoad;
9460 
9461   // Here is a common situation. We want to optimize:
9462   //
9463   //   %a = ...
9464   //   %b = and i32 %a, 2
9465   //   %c = srl i32 %b, 1
9466   //   brcond i32 %c ...
9467   //
9468   // into
9469   //
9470   //   %a = ...
9471   //   %b = and %a, 2
9472   //   %c = setcc eq %b, 0
9473   //   brcond %c ...
9474   //
9475   // However when after the source operand of SRL is optimized into AND, the SRL
9476   // itself may not be optimized further. Look for it and add the BRCOND into
9477   // the worklist.
9478   if (N->hasOneUse()) {
9479     SDNode *Use = *N->use_begin();
9480     if (Use->getOpcode() == ISD::BRCOND)
9481       AddToWorklist(Use);
9482     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9483       // Also look pass the truncate.
9484       Use = *Use->use_begin();
9485       if (Use->getOpcode() == ISD::BRCOND)
9486         AddToWorklist(Use);
9487     }
9488   }
9489 
9490   // Try to transform this shift into a multiply-high if
9491   // it matches the appropriate pattern detected in combineShiftToMULH.
9492   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9493     return MULH;
9494 
9495   return SDValue();
9496 }
9497 
9498 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9499   EVT VT = N->getValueType(0);
9500   SDValue N0 = N->getOperand(0);
9501   SDValue N1 = N->getOperand(1);
9502   SDValue N2 = N->getOperand(2);
9503   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9504   unsigned BitWidth = VT.getScalarSizeInBits();
9505 
9506   // fold (fshl N0, N1, 0) -> N0
9507   // fold (fshr N0, N1, 0) -> N1
9508   if (isPowerOf2_32(BitWidth))
9509     if (DAG.MaskedValueIsZero(
9510             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9511       return IsFSHL ? N0 : N1;
9512 
9513   auto IsUndefOrZero = [](SDValue V) {
9514     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9515   };
9516 
9517   // TODO - support non-uniform vector shift amounts.
9518   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9519     EVT ShAmtTy = N2.getValueType();
9520 
9521     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9522     if (Cst->getAPIntValue().uge(BitWidth)) {
9523       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9524       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9525                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9526     }
9527 
9528     unsigned ShAmt = Cst->getZExtValue();
9529     if (ShAmt == 0)
9530       return IsFSHL ? N0 : N1;
9531 
9532     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9533     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9534     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9535     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9536     if (IsUndefOrZero(N0))
9537       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9538                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9539                                          SDLoc(N), ShAmtTy));
9540     if (IsUndefOrZero(N1))
9541       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9542                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9543                                          SDLoc(N), ShAmtTy));
9544 
9545     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9546     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9547     // TODO - bigendian support once we have test coverage.
9548     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9549     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9550     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9551         !DAG.getDataLayout().isBigEndian()) {
9552       auto *LHS = dyn_cast<LoadSDNode>(N0);
9553       auto *RHS = dyn_cast<LoadSDNode>(N1);
9554       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9555           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9556           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9557           ISD::isNON_EXTLoad(LHS)) {
9558         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9559           SDLoc DL(RHS);
9560           uint64_t PtrOff =
9561               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9562           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9563           bool Fast = false;
9564           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9565                                      RHS->getAddressSpace(), NewAlign,
9566                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9567               Fast) {
9568             SDValue NewPtr = DAG.getMemBasePlusOffset(
9569                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9570             AddToWorklist(NewPtr.getNode());
9571             SDValue Load = DAG.getLoad(
9572                 VT, DL, RHS->getChain(), NewPtr,
9573                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9574                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9575             // Replace the old load's chain with the new load's chain.
9576             WorklistRemover DeadNodes(*this);
9577             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9578             return Load;
9579           }
9580         }
9581       }
9582     }
9583   }
9584 
9585   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9586   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9587   // iff We know the shift amount is in range.
9588   // TODO: when is it worth doing SUB(BW, N2) as well?
9589   if (isPowerOf2_32(BitWidth)) {
9590     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9591     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9592       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9593     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9594       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9595   }
9596 
9597   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9598   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9599   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9600   // is legal as well we might be better off avoiding non-constant (BW - N2).
9601   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9602   if (N0 == N1 && hasOperation(RotOpc, VT))
9603     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9604 
9605   // Simplify, based on bits shifted out of N0/N1.
9606   if (SimplifyDemandedBits(SDValue(N, 0)))
9607     return SDValue(N, 0);
9608 
9609   return SDValue();
9610 }
9611 
9612 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9613   SDValue N0 = N->getOperand(0);
9614   SDValue N1 = N->getOperand(1);
9615   if (SDValue V = DAG.simplifyShift(N0, N1))
9616     return V;
9617 
9618   EVT VT = N0.getValueType();
9619 
9620   // fold (*shlsat c1, c2) -> c1<<c2
9621   if (SDValue C =
9622           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9623     return C;
9624 
9625   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9626 
9627   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
9628     // fold (sshlsat x, c) -> (shl x, c)
9629     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
9630         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
9631       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9632 
9633     // fold (ushlsat x, c) -> (shl x, c)
9634     if (N->getOpcode() == ISD::USHLSAT && N1C &&
9635         N1C->getAPIntValue().ule(
9636             DAG.computeKnownBits(N0).countMinLeadingZeros()))
9637       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9638   }
9639 
9640   return SDValue();
9641 }
9642 
9643 // Given a ABS node, detect the following pattern:
9644 // (ABS (SUB (EXTEND a), (EXTEND b))).
9645 // Generates UABD/SABD instruction.
9646 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9647                                const TargetLowering &TLI) {
9648   SDValue AbsOp1 = N->getOperand(0);
9649   SDValue Op0, Op1;
9650 
9651   if (AbsOp1.getOpcode() != ISD::SUB)
9652     return SDValue();
9653 
9654   Op0 = AbsOp1.getOperand(0);
9655   Op1 = AbsOp1.getOperand(1);
9656 
9657   unsigned Opc0 = Op0.getOpcode();
9658   // Check if the operands of the sub are (zero|sign)-extended.
9659   if (Opc0 != Op1.getOpcode() ||
9660       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9661     return SDValue();
9662 
9663   EVT VT = N->getValueType(0);
9664   EVT VT1 = Op0.getOperand(0).getValueType();
9665   EVT VT2 = Op1.getOperand(0).getValueType();
9666   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9667 
9668   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
9669   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
9670   // NOTE: Extensions must be equivalent.
9671   if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
9672     Op0 = Op0.getOperand(0);
9673     Op1 = Op1.getOperand(0);
9674     SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
9675     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
9676   }
9677 
9678   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
9679   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
9680   if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
9681     return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
9682 
9683   return SDValue();
9684 }
9685 
9686 SDValue DAGCombiner::visitABS(SDNode *N) {
9687   SDValue N0 = N->getOperand(0);
9688   EVT VT = N->getValueType(0);
9689 
9690   // fold (abs c1) -> c2
9691   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9692     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9693   // fold (abs (abs x)) -> (abs x)
9694   if (N0.getOpcode() == ISD::ABS)
9695     return N0;
9696   // fold (abs x) -> x iff not-negative
9697   if (DAG.SignBitIsZero(N0))
9698     return N0;
9699 
9700   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9701     return ABD;
9702 
9703   return SDValue();
9704 }
9705 
9706 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9707   SDValue N0 = N->getOperand(0);
9708   EVT VT = N->getValueType(0);
9709   SDLoc DL(N);
9710 
9711   // fold (bswap c1) -> c2
9712   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9713     return DAG.getNode(ISD::BSWAP, DL, VT, N0);
9714   // fold (bswap (bswap x)) -> x
9715   if (N0.getOpcode() == ISD::BSWAP)
9716     return N0.getOperand(0);
9717 
9718   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9719   // isn't supported, it will be expanded to bswap followed by a manual reversal
9720   // of bits in each byte. By placing bswaps before bitreverse, we can remove
9721   // the two bswaps if the bitreverse gets expanded.
9722   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9723     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9724     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9725   }
9726 
9727   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
9728   // iff x >= bw/2 (i.e. lower half is known zero)
9729   unsigned BW = VT.getScalarSizeInBits();
9730   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
9731     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9732     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
9733     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9734         ShAmt->getZExtValue() >= (BW / 2) &&
9735         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
9736         TLI.isTruncateFree(VT, HalfVT) &&
9737         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
9738       SDValue Res = N0.getOperand(0);
9739       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
9740         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
9741                           DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
9742       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
9743       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
9744       return DAG.getZExtOrTrunc(Res, DL, VT);
9745     }
9746   }
9747 
9748   return SDValue();
9749 }
9750 
9751 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9752   SDValue N0 = N->getOperand(0);
9753   EVT VT = N->getValueType(0);
9754 
9755   // fold (bitreverse c1) -> c2
9756   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9757     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9758   // fold (bitreverse (bitreverse x)) -> x
9759   if (N0.getOpcode() == ISD::BITREVERSE)
9760     return N0.getOperand(0);
9761   return SDValue();
9762 }
9763 
9764 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9765   SDValue N0 = N->getOperand(0);
9766   EVT VT = N->getValueType(0);
9767 
9768   // fold (ctlz c1) -> c2
9769   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9770     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9771 
9772   // If the value is known never to be zero, switch to the undef version.
9773   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9774     if (DAG.isKnownNeverZero(N0))
9775       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9776   }
9777 
9778   return SDValue();
9779 }
9780 
9781 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9782   SDValue N0 = N->getOperand(0);
9783   EVT VT = N->getValueType(0);
9784 
9785   // fold (ctlz_zero_undef c1) -> c2
9786   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9787     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9788   return SDValue();
9789 }
9790 
9791 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9792   SDValue N0 = N->getOperand(0);
9793   EVT VT = N->getValueType(0);
9794 
9795   // fold (cttz c1) -> c2
9796   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9797     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9798 
9799   // If the value is known never to be zero, switch to the undef version.
9800   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9801     if (DAG.isKnownNeverZero(N0))
9802       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9803   }
9804 
9805   return SDValue();
9806 }
9807 
9808 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9809   SDValue N0 = N->getOperand(0);
9810   EVT VT = N->getValueType(0);
9811 
9812   // fold (cttz_zero_undef c1) -> c2
9813   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9814     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9815   return SDValue();
9816 }
9817 
9818 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9819   SDValue N0 = N->getOperand(0);
9820   EVT VT = N->getValueType(0);
9821 
9822   // fold (ctpop c1) -> c2
9823   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9824     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9825   return SDValue();
9826 }
9827 
9828 // FIXME: This should be checking for no signed zeros on individual operands, as
9829 // well as no nans.
9830 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9831                                          SDValue RHS,
9832                                          const TargetLowering &TLI) {
9833   const TargetOptions &Options = DAG.getTarget().Options;
9834   EVT VT = LHS.getValueType();
9835 
9836   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9837          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9838          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9839 }
9840 
9841 /// Generate Min/Max node
9842 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9843                                    SDValue RHS, SDValue True, SDValue False,
9844                                    ISD::CondCode CC, const TargetLowering &TLI,
9845                                    SelectionDAG &DAG) {
9846   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9847     return SDValue();
9848 
9849   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9850   switch (CC) {
9851   case ISD::SETOLT:
9852   case ISD::SETOLE:
9853   case ISD::SETLT:
9854   case ISD::SETLE:
9855   case ISD::SETULT:
9856   case ISD::SETULE: {
9857     // Since it's known never nan to get here already, either fminnum or
9858     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9859     // expanded in terms of it.
9860     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9861     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9862       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9863 
9864     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9865     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9866       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9867     return SDValue();
9868   }
9869   case ISD::SETOGT:
9870   case ISD::SETOGE:
9871   case ISD::SETGT:
9872   case ISD::SETGE:
9873   case ISD::SETUGT:
9874   case ISD::SETUGE: {
9875     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9876     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9877       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9878 
9879     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9880     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9881       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9882     return SDValue();
9883   }
9884   default:
9885     return SDValue();
9886   }
9887 }
9888 
9889 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9890 /// the condition operand sign-bit across the value width and use it as a mask.
9891 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9892   SDValue Cond = N->getOperand(0);
9893   SDValue C1 = N->getOperand(1);
9894   SDValue C2 = N->getOperand(2);
9895   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
9896     return SDValue();
9897 
9898   EVT VT = N->getValueType(0);
9899   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9900       VT != Cond.getOperand(0).getValueType())
9901     return SDValue();
9902 
9903   // The inverted-condition + commuted-select variants of these patterns are
9904   // canonicalized to these forms in IR.
9905   SDValue X = Cond.getOperand(0);
9906   SDValue CondC = Cond.getOperand(1);
9907   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9908   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9909       isAllOnesOrAllOnesSplat(C2)) {
9910     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9911     SDLoc DL(N);
9912     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9913     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9914     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9915   }
9916   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9917     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9918     SDLoc DL(N);
9919     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9920     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9921     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9922   }
9923   return SDValue();
9924 }
9925 
9926 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9927   SDValue Cond = N->getOperand(0);
9928   SDValue N1 = N->getOperand(1);
9929   SDValue N2 = N->getOperand(2);
9930   EVT VT = N->getValueType(0);
9931   EVT CondVT = Cond.getValueType();
9932   SDLoc DL(N);
9933 
9934   if (!VT.isInteger())
9935     return SDValue();
9936 
9937   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9938   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9939   if (!C1 || !C2)
9940     return SDValue();
9941 
9942   // Only do this before legalization to avoid conflicting with target-specific
9943   // transforms in the other direction (create a select from a zext/sext). There
9944   // is also a target-independent combine here in DAGCombiner in the other
9945   // direction for (select Cond, -1, 0) when the condition is not i1.
9946   if (CondVT == MVT::i1 && !LegalOperations) {
9947     if (C1->isZero() && C2->isOne()) {
9948       // select Cond, 0, 1 --> zext (!Cond)
9949       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9950       if (VT != MVT::i1)
9951         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9952       return NotCond;
9953     }
9954     if (C1->isZero() && C2->isAllOnes()) {
9955       // select Cond, 0, -1 --> sext (!Cond)
9956       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9957       if (VT != MVT::i1)
9958         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9959       return NotCond;
9960     }
9961     if (C1->isOne() && C2->isZero()) {
9962       // select Cond, 1, 0 --> zext (Cond)
9963       if (VT != MVT::i1)
9964         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9965       return Cond;
9966     }
9967     if (C1->isAllOnes() && C2->isZero()) {
9968       // select Cond, -1, 0 --> sext (Cond)
9969       if (VT != MVT::i1)
9970         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9971       return Cond;
9972     }
9973 
9974     // Use a target hook because some targets may prefer to transform in the
9975     // other direction.
9976     if (TLI.convertSelectOfConstantsToMath(VT)) {
9977       // For any constants that differ by 1, we can transform the select into an
9978       // extend and add.
9979       const APInt &C1Val = C1->getAPIntValue();
9980       const APInt &C2Val = C2->getAPIntValue();
9981       if (C1Val - 1 == C2Val) {
9982         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9983         if (VT != MVT::i1)
9984           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9985         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9986       }
9987       if (C1Val + 1 == C2Val) {
9988         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9989         if (VT != MVT::i1)
9990           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9991         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9992       }
9993 
9994       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9995       if (C1Val.isPowerOf2() && C2Val.isZero()) {
9996         if (VT != MVT::i1)
9997           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9998         SDValue ShAmtC =
9999             DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
10000         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
10001       }
10002 
10003       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10004         return V;
10005     }
10006 
10007     return SDValue();
10008   }
10009 
10010   // fold (select Cond, 0, 1) -> (xor Cond, 1)
10011   // We can't do this reliably if integer based booleans have different contents
10012   // to floating point based booleans. This is because we can't tell whether we
10013   // have an integer-based boolean or a floating-point-based boolean unless we
10014   // can find the SETCC that produced it and inspect its operands. This is
10015   // fairly easy if C is the SETCC node, but it can potentially be
10016   // undiscoverable (or not reasonably discoverable). For example, it could be
10017   // in another basic block or it could require searching a complicated
10018   // expression.
10019   if (CondVT.isInteger() &&
10020       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
10021           TargetLowering::ZeroOrOneBooleanContent &&
10022       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
10023           TargetLowering::ZeroOrOneBooleanContent &&
10024       C1->isZero() && C2->isOne()) {
10025     SDValue NotCond =
10026         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
10027     if (VT.bitsEq(CondVT))
10028       return NotCond;
10029     return DAG.getZExtOrTrunc(NotCond, DL, VT);
10030   }
10031 
10032   return SDValue();
10033 }
10034 
10035 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
10036   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
10037          "Expected a (v)select");
10038   SDValue Cond = N->getOperand(0);
10039   SDValue T = N->getOperand(1), F = N->getOperand(2);
10040   EVT VT = N->getValueType(0);
10041   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
10042     return SDValue();
10043 
10044   // select Cond, Cond, F --> or Cond, F
10045   // select Cond, 1, F    --> or Cond, F
10046   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
10047     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
10048 
10049   // select Cond, T, Cond --> and Cond, T
10050   // select Cond, T, 0    --> and Cond, T
10051   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
10052     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
10053 
10054   // select Cond, T, 1 --> or (not Cond), T
10055   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
10056     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10057     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
10058   }
10059 
10060   // select Cond, 0, F --> and (not Cond), F
10061   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
10062     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10063     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
10064   }
10065 
10066   return SDValue();
10067 }
10068 
10069 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
10070   SDValue N0 = N->getOperand(0);
10071   SDValue N1 = N->getOperand(1);
10072   SDValue N2 = N->getOperand(2);
10073   EVT VT = N->getValueType(0);
10074   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
10075     return SDValue();
10076 
10077   SDValue Cond0 = N0.getOperand(0);
10078   SDValue Cond1 = N0.getOperand(1);
10079   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10080   if (VT != Cond0.getValueType())
10081     return SDValue();
10082 
10083   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
10084   // compare is inverted from that pattern ("Cond0 s> -1").
10085   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
10086     ; // This is the pattern we are looking for.
10087   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
10088     std::swap(N1, N2);
10089   else
10090     return SDValue();
10091 
10092   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
10093   if (isNullOrNullSplat(N2)) {
10094     SDLoc DL(N);
10095     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10096     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10097     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
10098   }
10099 
10100   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
10101   if (isAllOnesOrAllOnesSplat(N1)) {
10102     SDLoc DL(N);
10103     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10104     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10105     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
10106   }
10107 
10108   // If we have to invert the sign bit mask, only do that transform if the
10109   // target has a bitwise 'and not' instruction (the invert is free).
10110   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
10111   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10112   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
10113     SDLoc DL(N);
10114     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10115     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10116     SDValue Not = DAG.getNOT(DL, Sra, VT);
10117     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
10118   }
10119 
10120   // TODO: There's another pattern in this family, but it may require
10121   //       implementing hasOrNot() to check for profitability:
10122   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
10123 
10124   return SDValue();
10125 }
10126 
10127 SDValue DAGCombiner::visitSELECT(SDNode *N) {
10128   SDValue N0 = N->getOperand(0);
10129   SDValue N1 = N->getOperand(1);
10130   SDValue N2 = N->getOperand(2);
10131   EVT VT = N->getValueType(0);
10132   EVT VT0 = N0.getValueType();
10133   SDLoc DL(N);
10134   SDNodeFlags Flags = N->getFlags();
10135 
10136   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10137     return V;
10138 
10139   if (SDValue V = foldSelectOfConstants(N))
10140     return V;
10141 
10142   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10143     return V;
10144 
10145   // If we can fold this based on the true/false value, do so.
10146   if (SimplifySelectOps(N, N1, N2))
10147     return SDValue(N, 0); // Don't revisit N.
10148 
10149   if (VT0 == MVT::i1) {
10150     // The code in this block deals with the following 2 equivalences:
10151     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
10152     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
10153     // The target can specify its preferred form with the
10154     // shouldNormalizeToSelectSequence() callback. However we always transform
10155     // to the right anyway if we find the inner select exists in the DAG anyway
10156     // and we always transform to the left side if we know that we can further
10157     // optimize the combination of the conditions.
10158     bool normalizeToSequence =
10159         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
10160     // select (and Cond0, Cond1), X, Y
10161     //   -> select Cond0, (select Cond1, X, Y), Y
10162     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
10163       SDValue Cond0 = N0->getOperand(0);
10164       SDValue Cond1 = N0->getOperand(1);
10165       SDValue InnerSelect =
10166           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
10167       if (normalizeToSequence || !InnerSelect.use_empty())
10168         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
10169                            InnerSelect, N2, Flags);
10170       // Cleanup on failure.
10171       if (InnerSelect.use_empty())
10172         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10173     }
10174     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
10175     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
10176       SDValue Cond0 = N0->getOperand(0);
10177       SDValue Cond1 = N0->getOperand(1);
10178       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
10179                                         Cond1, N1, N2, Flags);
10180       if (normalizeToSequence || !InnerSelect.use_empty())
10181         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
10182                            InnerSelect, Flags);
10183       // Cleanup on failure.
10184       if (InnerSelect.use_empty())
10185         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10186     }
10187 
10188     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
10189     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
10190       SDValue N1_0 = N1->getOperand(0);
10191       SDValue N1_1 = N1->getOperand(1);
10192       SDValue N1_2 = N1->getOperand(2);
10193       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
10194         // Create the actual and node if we can generate good code for it.
10195         if (!normalizeToSequence) {
10196           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
10197           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
10198                              N2, Flags);
10199         }
10200         // Otherwise see if we can optimize the "and" to a better pattern.
10201         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
10202           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
10203                              N2, Flags);
10204         }
10205       }
10206     }
10207     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
10208     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
10209       SDValue N2_0 = N2->getOperand(0);
10210       SDValue N2_1 = N2->getOperand(1);
10211       SDValue N2_2 = N2->getOperand(2);
10212       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
10213         // Create the actual or node if we can generate good code for it.
10214         if (!normalizeToSequence) {
10215           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
10216           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
10217                              N2_2, Flags);
10218         }
10219         // Otherwise see if we can optimize to a better pattern.
10220         if (SDValue Combined = visitORLike(N0, N2_0, N))
10221           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
10222                              N2_2, Flags);
10223       }
10224     }
10225   }
10226 
10227   // select (not Cond), N1, N2 -> select Cond, N2, N1
10228   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10229     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10230     SelectOp->setFlags(Flags);
10231     return SelectOp;
10232   }
10233 
10234   // Fold selects based on a setcc into other things, such as min/max/abs.
10235   if (N0.getOpcode() == ISD::SETCC) {
10236     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
10237     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10238 
10239     // select (fcmp lt x, y), x, y -> fminnum x, y
10240     // select (fcmp gt x, y), x, y -> fmaxnum x, y
10241     //
10242     // This is OK if we don't care what happens if either operand is a NaN.
10243     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10244       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
10245                                                 CC, TLI, DAG))
10246         return FMinMax;
10247 
10248     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
10249     // This is conservatively limited to pre-legal-operations to give targets
10250     // a chance to reverse the transform if they want to do that. Also, it is
10251     // unlikely that the pattern would be formed late, so it's probably not
10252     // worth going through the other checks.
10253     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
10254         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
10255         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10256       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10257       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10258       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10259         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10260         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10261         //
10262         // The IR equivalent of this transform would have this form:
10263         //   %a = add %x, C
10264         //   %c = icmp ugt %x, ~C
10265         //   %r = select %c, -1, %a
10266         //   =>
10267         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10268         //   %u0 = extractvalue %u, 0
10269         //   %u1 = extractvalue %u, 1
10270         //   %r = select %u1, -1, %u0
10271         SDVTList VTs = DAG.getVTList(VT, VT0);
10272         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10273         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10274       }
10275     }
10276 
10277     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10278         (!LegalOperations &&
10279          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
10280       // Any flags available in a select/setcc fold will be on the setcc as they
10281       // migrated from fcmp
10282       Flags = N0->getFlags();
10283       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10284                                        N2, N0.getOperand(2));
10285       SelectNode->setFlags(Flags);
10286       return SelectNode;
10287     }
10288 
10289     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10290       return NewSel;
10291   }
10292 
10293   if (!VT.isVector())
10294     if (SDValue BinOp = foldSelectOfBinops(N))
10295       return BinOp;
10296 
10297   return SDValue();
10298 }
10299 
10300 // This function assumes all the vselect's arguments are CONCAT_VECTOR
10301 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10302 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
10303   SDLoc DL(N);
10304   SDValue Cond = N->getOperand(0);
10305   SDValue LHS = N->getOperand(1);
10306   SDValue RHS = N->getOperand(2);
10307   EVT VT = N->getValueType(0);
10308   int NumElems = VT.getVectorNumElements();
10309   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10310          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10311          Cond.getOpcode() == ISD::BUILD_VECTOR);
10312 
10313   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10314   // binary ones here.
10315   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10316     return SDValue();
10317 
10318   // We're sure we have an even number of elements due to the
10319   // concat_vectors we have as arguments to vselect.
10320   // Skip BV elements until we find one that's not an UNDEF
10321   // After we find an UNDEF element, keep looping until we get to half the
10322   // length of the BV and see if all the non-undef nodes are the same.
10323   ConstantSDNode *BottomHalf = nullptr;
10324   for (int i = 0; i < NumElems / 2; ++i) {
10325     if (Cond->getOperand(i)->isUndef())
10326       continue;
10327 
10328     if (BottomHalf == nullptr)
10329       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10330     else if (Cond->getOperand(i).getNode() != BottomHalf)
10331       return SDValue();
10332   }
10333 
10334   // Do the same for the second half of the BuildVector
10335   ConstantSDNode *TopHalf = nullptr;
10336   for (int i = NumElems / 2; i < NumElems; ++i) {
10337     if (Cond->getOperand(i)->isUndef())
10338       continue;
10339 
10340     if (TopHalf == nullptr)
10341       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10342     else if (Cond->getOperand(i).getNode() != TopHalf)
10343       return SDValue();
10344   }
10345 
10346   assert(TopHalf && BottomHalf &&
10347          "One half of the selector was all UNDEFs and the other was all the "
10348          "same value. This should have been addressed before this function.");
10349   return DAG.getNode(
10350       ISD::CONCAT_VECTORS, DL, VT,
10351       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10352       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10353 }
10354 
10355 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
10356   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10357     return false;
10358 
10359   // For now we check only the LHS of the add.
10360   SDValue LHS = Index.getOperand(0);
10361   SDValue SplatVal = DAG.getSplatValue(LHS);
10362   if (!SplatVal)
10363     return false;
10364 
10365   BasePtr = SplatVal;
10366   Index = Index.getOperand(1);
10367   return true;
10368 }
10369 
10370 // Fold sext/zext of index into index type.
10371 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
10372                      bool Scaled, SelectionDAG &DAG) {
10373   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10374 
10375   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10376     SDValue Op = Index.getOperand(0);
10377     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
10378     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10379       Index = Op;
10380       return true;
10381     }
10382   }
10383 
10384   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
10385     SDValue Op = Index.getOperand(0);
10386     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
10387     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
10388       Index = Op;
10389       return true;
10390     }
10391   }
10392 
10393   return false;
10394 }
10395 
10396 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10397   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10398   SDValue Mask = MSC->getMask();
10399   SDValue Chain = MSC->getChain();
10400   SDValue Index = MSC->getIndex();
10401   SDValue Scale = MSC->getScale();
10402   SDValue StoreVal = MSC->getValue();
10403   SDValue BasePtr = MSC->getBasePtr();
10404   SDLoc DL(N);
10405 
10406   // Zap scatters with a zero mask.
10407   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10408     return Chain;
10409 
10410   if (refineUniformBase(BasePtr, Index, DAG)) {
10411     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10412     return DAG.getMaskedScatter(
10413         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10414         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10415   }
10416 
10417   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
10418     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10419     return DAG.getMaskedScatter(
10420         DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops,
10421         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
10422   }
10423 
10424   return SDValue();
10425 }
10426 
10427 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10428   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10429   SDValue Mask = MST->getMask();
10430   SDValue Chain = MST->getChain();
10431   SDValue Value = MST->getValue();
10432   SDValue Ptr = MST->getBasePtr();
10433   SDLoc DL(N);
10434 
10435   // Zap masked stores with a zero mask.
10436   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10437     return Chain;
10438 
10439   // If this is a masked load with an all ones mask, we can use a unmasked load.
10440   // FIXME: Can we do this for indexed, compressing, or truncating stores?
10441   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10442       !MST->isCompressingStore() && !MST->isTruncatingStore())
10443     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10444                         MST->getBasePtr(), MST->getPointerInfo(),
10445                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
10446                         MST->getAAInfo());
10447 
10448   // Try transforming N to an indexed store.
10449   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10450     return SDValue(N, 0);
10451 
10452   if (MST->isTruncatingStore() && MST->isUnindexed() &&
10453       Value.getValueType().isInteger() &&
10454       (!isa<ConstantSDNode>(Value) ||
10455        !cast<ConstantSDNode>(Value)->isOpaque())) {
10456     APInt TruncDemandedBits =
10457         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10458                              MST->getMemoryVT().getScalarSizeInBits());
10459 
10460     // See if we can simplify the operation with
10461     // SimplifyDemandedBits, which only works if the value has a single use.
10462     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10463       // Re-visit the store if anything changed and the store hasn't been merged
10464       // with another node (N is deleted) SimplifyDemandedBits will add Value's
10465       // node back to the worklist if necessary, but we also need to re-visit
10466       // the Store node itself.
10467       if (N->getOpcode() != ISD::DELETED_NODE)
10468         AddToWorklist(N);
10469       return SDValue(N, 0);
10470     }
10471   }
10472 
10473   // If this is a TRUNC followed by a masked store, fold this into a masked
10474   // truncating store.  We can do this even if this is already a masked
10475   // truncstore.
10476   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
10477       MST->isUnindexed() &&
10478       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10479                                MST->getMemoryVT(), LegalOperations)) {
10480     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10481                                          Value.getOperand(0).getValueType());
10482     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10483                               MST->getOffset(), Mask, MST->getMemoryVT(),
10484                               MST->getMemOperand(), MST->getAddressingMode(),
10485                               /*IsTruncating=*/true);
10486   }
10487 
10488   return SDValue();
10489 }
10490 
10491 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10492   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10493   SDValue Mask = MGT->getMask();
10494   SDValue Chain = MGT->getChain();
10495   SDValue Index = MGT->getIndex();
10496   SDValue Scale = MGT->getScale();
10497   SDValue PassThru = MGT->getPassThru();
10498   SDValue BasePtr = MGT->getBasePtr();
10499   SDLoc DL(N);
10500 
10501   // Zap gathers with a zero mask.
10502   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10503     return CombineTo(N, PassThru, MGT->getChain());
10504 
10505   if (refineUniformBase(BasePtr, Index, DAG)) {
10506     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10507     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10508                                MGT->getMemoryVT(), DL, Ops,
10509                                MGT->getMemOperand(), MGT->getIndexType(),
10510                                MGT->getExtensionType());
10511   }
10512 
10513   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
10514     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10515     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
10516                                MGT->getMemoryVT(), DL, Ops,
10517                                MGT->getMemOperand(), MGT->getIndexType(),
10518                                MGT->getExtensionType());
10519   }
10520 
10521   return SDValue();
10522 }
10523 
10524 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10525   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10526   SDValue Mask = MLD->getMask();
10527   SDLoc DL(N);
10528 
10529   // Zap masked loads with a zero mask.
10530   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10531     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10532 
10533   // If this is a masked load with an all ones mask, we can use a unmasked load.
10534   // FIXME: Can we do this for indexed, expanding, or extending loads?
10535   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10536       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10537     SDValue NewLd = DAG.getLoad(
10538         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10539         MLD->getPointerInfo(), MLD->getOriginalAlign(),
10540         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10541     return CombineTo(N, NewLd, NewLd.getValue(1));
10542   }
10543 
10544   // Try transforming N to an indexed load.
10545   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10546     return SDValue(N, 0);
10547 
10548   return SDValue();
10549 }
10550 
10551 /// A vector select of 2 constant vectors can be simplified to math/logic to
10552 /// avoid a variable select instruction and possibly avoid constant loads.
10553 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10554   SDValue Cond = N->getOperand(0);
10555   SDValue N1 = N->getOperand(1);
10556   SDValue N2 = N->getOperand(2);
10557   EVT VT = N->getValueType(0);
10558   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10559       !TLI.convertSelectOfConstantsToMath(VT) ||
10560       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10561       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10562     return SDValue();
10563 
10564   // Check if we can use the condition value to increment/decrement a single
10565   // constant value. This simplifies a select to an add and removes a constant
10566   // load/materialization from the general case.
10567   bool AllAddOne = true;
10568   bool AllSubOne = true;
10569   unsigned Elts = VT.getVectorNumElements();
10570   for (unsigned i = 0; i != Elts; ++i) {
10571     SDValue N1Elt = N1.getOperand(i);
10572     SDValue N2Elt = N2.getOperand(i);
10573     if (N1Elt.isUndef() || N2Elt.isUndef())
10574       continue;
10575     if (N1Elt.getValueType() != N2Elt.getValueType())
10576       continue;
10577 
10578     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10579     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10580     if (C1 != C2 + 1)
10581       AllAddOne = false;
10582     if (C1 != C2 - 1)
10583       AllSubOne = false;
10584   }
10585 
10586   // Further simplifications for the extra-special cases where the constants are
10587   // all 0 or all -1 should be implemented as folds of these patterns.
10588   SDLoc DL(N);
10589   if (AllAddOne || AllSubOne) {
10590     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10591     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10592     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10593     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10594     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10595   }
10596 
10597   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10598   APInt Pow2C;
10599   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10600       isNullOrNullSplat(N2)) {
10601     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10602     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10603     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10604   }
10605 
10606   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10607     return V;
10608 
10609   // The general case for select-of-constants:
10610   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10611   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10612   // leave that to a machine-specific pass.
10613   return SDValue();
10614 }
10615 
10616 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10617   SDValue N0 = N->getOperand(0);
10618   SDValue N1 = N->getOperand(1);
10619   SDValue N2 = N->getOperand(2);
10620   EVT VT = N->getValueType(0);
10621   SDLoc DL(N);
10622 
10623   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10624     return V;
10625 
10626   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10627     return V;
10628 
10629   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10630   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10631     return DAG.getSelect(DL, VT, F, N2, N1);
10632 
10633   // Canonicalize integer abs.
10634   // vselect (setg[te] X,  0),  X, -X ->
10635   // vselect (setgt    X, -1),  X, -X ->
10636   // vselect (setl[te] X,  0), -X,  X ->
10637   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10638   if (N0.getOpcode() == ISD::SETCC) {
10639     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10640     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10641     bool isAbs = false;
10642     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10643 
10644     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10645          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10646         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10647       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10648     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10649              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10650       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10651 
10652     if (isAbs) {
10653       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10654         return DAG.getNode(ISD::ABS, DL, VT, LHS);
10655 
10656       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10657                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
10658                                                   DL, getShiftAmountTy(VT)));
10659       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10660       AddToWorklist(Shift.getNode());
10661       AddToWorklist(Add.getNode());
10662       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10663     }
10664 
10665     // vselect x, y (fcmp lt x, y) -> fminnum x, y
10666     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10667     //
10668     // This is OK if we don't care about what happens if either operand is a
10669     // NaN.
10670     //
10671     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10672       if (SDValue FMinMax =
10673               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10674         return FMinMax;
10675     }
10676 
10677     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10678       return S;
10679     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10680       return S;
10681 
10682     // If this select has a condition (setcc) with narrower operands than the
10683     // select, try to widen the compare to match the select width.
10684     // TODO: This should be extended to handle any constant.
10685     // TODO: This could be extended to handle non-loading patterns, but that
10686     //       requires thorough testing to avoid regressions.
10687     if (isNullOrNullSplat(RHS)) {
10688       EVT NarrowVT = LHS.getValueType();
10689       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10690       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10691       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10692       unsigned WideWidth = WideVT.getScalarSizeInBits();
10693       bool IsSigned = isSignedIntSetCC(CC);
10694       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10695       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10696           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10697           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10698           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10699         // Both compare operands can be widened for free. The LHS can use an
10700         // extended load, and the RHS is a constant:
10701         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10702         //   vselect (setcc extload(X), C'), N1, N2
10703         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10704         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10705         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10706         EVT WideSetCCVT = getSetCCResultType(WideVT);
10707         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10708         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10709       }
10710     }
10711 
10712     // Match VSELECTs into add with unsigned saturation.
10713     if (hasOperation(ISD::UADDSAT, VT)) {
10714       // Check if one of the arms of the VSELECT is vector with all bits set.
10715       // If it's on the left side invert the predicate to simplify logic below.
10716       SDValue Other;
10717       ISD::CondCode SatCC = CC;
10718       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10719         Other = N2;
10720         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10721       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10722         Other = N1;
10723       }
10724 
10725       if (Other && Other.getOpcode() == ISD::ADD) {
10726         SDValue CondLHS = LHS, CondRHS = RHS;
10727         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10728 
10729         // Canonicalize condition operands.
10730         if (SatCC == ISD::SETUGE) {
10731           std::swap(CondLHS, CondRHS);
10732           SatCC = ISD::SETULE;
10733         }
10734 
10735         // We can test against either of the addition operands.
10736         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10737         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10738         if (SatCC == ISD::SETULE && Other == CondRHS &&
10739             (OpLHS == CondLHS || OpRHS == CondLHS))
10740           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10741 
10742         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10743             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10744              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10745             CondLHS == OpLHS) {
10746           // If the RHS is a constant we have to reverse the const
10747           // canonicalization.
10748           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10749           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10750             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10751           };
10752           if (SatCC == ISD::SETULE &&
10753               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10754             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10755         }
10756       }
10757     }
10758 
10759     // Match VSELECTs into sub with unsigned saturation.
10760     if (hasOperation(ISD::USUBSAT, VT)) {
10761       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10762       // the left side invert the predicate to simplify logic below.
10763       SDValue Other;
10764       ISD::CondCode SatCC = CC;
10765       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10766         Other = N2;
10767         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10768       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10769         Other = N1;
10770       }
10771 
10772       if (Other && Other.getNumOperands() == 2) {
10773         SDValue CondRHS = RHS;
10774         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10775 
10776         if (Other.getOpcode() == ISD::SUB &&
10777             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
10778             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
10779           // Look for a general sub with unsigned saturation first.
10780           // zext(x) >= y ? x - trunc(y) : 0
10781           // --> usubsat(x,trunc(umin(y,SatLimit)))
10782           // zext(x) >  y ? x - trunc(y) : 0
10783           // --> usubsat(x,trunc(umin(y,SatLimit)))
10784           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
10785             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10786                                        DL);
10787         }
10788 
10789         if (OpLHS == LHS) {
10790           // Look for a general sub with unsigned saturation first.
10791           // x >= y ? x-y : 0 --> usubsat x, y
10792           // x >  y ? x-y : 0 --> usubsat x, y
10793           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10794               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10795             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10796 
10797           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10798               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10799             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10800                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10801               // If the RHS is a constant we have to reverse the const
10802               // canonicalization.
10803               // x > C-1 ? x+-C : 0 --> usubsat x, C
10804               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10805                 return (!Op && !Cond) ||
10806                        (Op && Cond &&
10807                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10808               };
10809               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10810                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10811                                             /*AllowUndefs*/ true)) {
10812                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10813                                     DAG.getConstant(0, DL, VT), OpRHS);
10814                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10815               }
10816 
10817               // Another special case: If C was a sign bit, the sub has been
10818               // canonicalized into a xor.
10819               // FIXME: Would it be better to use computeKnownBits to determine
10820               //        whether it's safe to decanonicalize the xor?
10821               // x s< 0 ? x^C : 0 --> usubsat x, C
10822               APInt SplatValue;
10823               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10824                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10825                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10826                   SplatValue.isSignMask()) {
10827                 // Note that we have to rebuild the RHS constant here to
10828                 // ensure we don't rely on particular values of undef lanes.
10829                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10830                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10831               }
10832             }
10833           }
10834         }
10835       }
10836     }
10837   }
10838 
10839   if (SimplifySelectOps(N, N1, N2))
10840     return SDValue(N, 0);  // Don't revisit N.
10841 
10842   // Fold (vselect all_ones, N1, N2) -> N1
10843   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10844     return N1;
10845   // Fold (vselect all_zeros, N1, N2) -> N2
10846   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10847     return N2;
10848 
10849   // The ConvertSelectToConcatVector function is assuming both the above
10850   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10851   // and addressed.
10852   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10853       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10854       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10855     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10856       return CV;
10857   }
10858 
10859   if (SDValue V = foldVSelectOfConstants(N))
10860     return V;
10861 
10862   if (hasOperation(ISD::SRA, VT))
10863     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
10864       return V;
10865 
10866   return SDValue();
10867 }
10868 
10869 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10870   SDValue N0 = N->getOperand(0);
10871   SDValue N1 = N->getOperand(1);
10872   SDValue N2 = N->getOperand(2);
10873   SDValue N3 = N->getOperand(3);
10874   SDValue N4 = N->getOperand(4);
10875   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10876 
10877   // fold select_cc lhs, rhs, x, x, cc -> x
10878   if (N2 == N3)
10879     return N2;
10880 
10881   // Determine if the condition we're dealing with is constant
10882   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10883                                   CC, SDLoc(N), false)) {
10884     AddToWorklist(SCC.getNode());
10885 
10886     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10887       if (!SCCC->isZero())
10888         return N2;    // cond always true -> true val
10889       else
10890         return N3;    // cond always false -> false val
10891     } else if (SCC->isUndef()) {
10892       // When the condition is UNDEF, just return the first operand. This is
10893       // coherent the DAG creation, no setcc node is created in this case
10894       return N2;
10895     } else if (SCC.getOpcode() == ISD::SETCC) {
10896       // Fold to a simpler select_cc
10897       SDValue SelectOp = DAG.getNode(
10898           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10899           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10900       SelectOp->setFlags(SCC->getFlags());
10901       return SelectOp;
10902     }
10903   }
10904 
10905   // If we can fold this based on the true/false value, do so.
10906   if (SimplifySelectOps(N, N2, N3))
10907     return SDValue(N, 0);  // Don't revisit N.
10908 
10909   // fold select_cc into other things, such as min/max/abs
10910   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10911 }
10912 
10913 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10914   // setcc is very commonly used as an argument to brcond. This pattern
10915   // also lend itself to numerous combines and, as a result, it is desired
10916   // we keep the argument to a brcond as a setcc as much as possible.
10917   bool PreferSetCC =
10918       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10919 
10920   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
10921   EVT VT = N->getValueType(0);
10922 
10923   //   SETCC(FREEZE(X), CONST, Cond)
10924   // =>
10925   //   FREEZE(SETCC(X, CONST, Cond))
10926   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
10927   // isn't equivalent to true or false.
10928   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
10929   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
10930   //
10931   // This transformation is beneficial because visitBRCOND can fold
10932   // BRCOND(FREEZE(X)) to BRCOND(X).
10933 
10934   // Conservatively optimize integer comparisons only.
10935   if (PreferSetCC) {
10936     // Do this only when SETCC is going to be used by BRCOND.
10937 
10938     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
10939     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
10940     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
10941     bool Updated = false;
10942 
10943     // Is 'X Cond C' always true or false?
10944     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
10945       bool False = (Cond == ISD::SETULT && C->isZero()) ||
10946                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
10947                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
10948                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
10949       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
10950                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
10951                    (Cond == ISD::SETUGE && C->isZero()) ||
10952                    (Cond == ISD::SETGE  && C->isMinSignedValue());
10953       return True || False;
10954     };
10955 
10956     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
10957       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
10958         N0 = N0->getOperand(0);
10959         Updated = true;
10960       }
10961     }
10962     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
10963       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
10964                                N0C)) {
10965         N1 = N1->getOperand(0);
10966         Updated = true;
10967       }
10968     }
10969 
10970     if (Updated)
10971       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
10972   }
10973 
10974   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
10975                                    SDLoc(N), !PreferSetCC);
10976 
10977   if (!Combined)
10978     return SDValue();
10979 
10980   // If we prefer to have a setcc, and we don't, we'll try our best to
10981   // recreate one using rebuildSetCC.
10982   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10983     SDValue NewSetCC = rebuildSetCC(Combined);
10984 
10985     // We don't have anything interesting to combine to.
10986     if (NewSetCC.getNode() == N)
10987       return SDValue();
10988 
10989     if (NewSetCC)
10990       return NewSetCC;
10991   }
10992 
10993   return Combined;
10994 }
10995 
10996 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10997   SDValue LHS = N->getOperand(0);
10998   SDValue RHS = N->getOperand(1);
10999   SDValue Carry = N->getOperand(2);
11000   SDValue Cond = N->getOperand(3);
11001 
11002   // If Carry is false, fold to a regular SETCC.
11003   if (isNullConstant(Carry))
11004     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
11005 
11006   return SDValue();
11007 }
11008 
11009 /// Check if N satisfies:
11010 ///   N is used once.
11011 ///   N is a Load.
11012 ///   The load is compatible with ExtOpcode. It means
11013 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
11014 ///     extension.
11015 ///     Otherwise returns true.
11016 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
11017   if (!N.hasOneUse())
11018     return false;
11019 
11020   if (!isa<LoadSDNode>(N))
11021     return false;
11022 
11023   LoadSDNode *Load = cast<LoadSDNode>(N);
11024   ISD::LoadExtType LoadExt = Load->getExtensionType();
11025   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
11026     return true;
11027 
11028   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
11029   // extension.
11030   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
11031       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
11032     return false;
11033 
11034   return true;
11035 }
11036 
11037 /// Fold
11038 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
11039 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
11040 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
11041 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11042 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11043 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
11044                                          SelectionDAG &DAG) {
11045   unsigned Opcode = N->getOpcode();
11046   SDValue N0 = N->getOperand(0);
11047   EVT VT = N->getValueType(0);
11048   SDLoc DL(N);
11049 
11050   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11051           Opcode == ISD::ANY_EXTEND) &&
11052          "Expected EXTEND dag node in input!");
11053 
11054   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
11055       !N0.hasOneUse())
11056     return SDValue();
11057 
11058   SDValue Op1 = N0->getOperand(1);
11059   SDValue Op2 = N0->getOperand(2);
11060   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
11061     return SDValue();
11062 
11063   auto ExtLoadOpcode = ISD::EXTLOAD;
11064   if (Opcode == ISD::SIGN_EXTEND)
11065     ExtLoadOpcode = ISD::SEXTLOAD;
11066   else if (Opcode == ISD::ZERO_EXTEND)
11067     ExtLoadOpcode = ISD::ZEXTLOAD;
11068 
11069   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
11070   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
11071   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
11072       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
11073     return SDValue();
11074 
11075   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
11076   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
11077   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
11078 }
11079 
11080 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
11081 /// a build_vector of constants.
11082 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11083 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11084 /// Vector extends are not folded if operations are legal; this is to
11085 /// avoid introducing illegal build_vector dag nodes.
11086 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
11087                                          SelectionDAG &DAG, bool LegalTypes) {
11088   unsigned Opcode = N->getOpcode();
11089   SDValue N0 = N->getOperand(0);
11090   EVT VT = N->getValueType(0);
11091   SDLoc DL(N);
11092 
11093   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11094          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
11095          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
11096          && "Expected EXTEND dag node in input!");
11097 
11098   // fold (sext c1) -> c1
11099   // fold (zext c1) -> c1
11100   // fold (aext c1) -> c1
11101   if (isa<ConstantSDNode>(N0))
11102     return DAG.getNode(Opcode, DL, VT, N0);
11103 
11104   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11105   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
11106   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11107   if (N0->getOpcode() == ISD::SELECT) {
11108     SDValue Op1 = N0->getOperand(1);
11109     SDValue Op2 = N0->getOperand(2);
11110     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
11111         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
11112       // For any_extend, choose sign extension of the constants to allow a
11113       // possible further transform to sign_extend_inreg.i.e.
11114       //
11115       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
11116       // t2: i64 = any_extend t1
11117       // -->
11118       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
11119       // -->
11120       // t4: i64 = sign_extend_inreg t3
11121       unsigned FoldOpc = Opcode;
11122       if (FoldOpc == ISD::ANY_EXTEND)
11123         FoldOpc = ISD::SIGN_EXTEND;
11124       return DAG.getSelect(DL, VT, N0->getOperand(0),
11125                            DAG.getNode(FoldOpc, DL, VT, Op1),
11126                            DAG.getNode(FoldOpc, DL, VT, Op2));
11127     }
11128   }
11129 
11130   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
11131   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
11132   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
11133   EVT SVT = VT.getScalarType();
11134   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
11135       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
11136     return SDValue();
11137 
11138   // We can fold this node into a build_vector.
11139   unsigned VTBits = SVT.getSizeInBits();
11140   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
11141   SmallVector<SDValue, 8> Elts;
11142   unsigned NumElts = VT.getVectorNumElements();
11143 
11144   // For zero-extensions, UNDEF elements still guarantee to have the upper
11145   // bits set to zero.
11146   bool IsZext =
11147       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
11148 
11149   for (unsigned i = 0; i != NumElts; ++i) {
11150     SDValue Op = N0.getOperand(i);
11151     if (Op.isUndef()) {
11152       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
11153       continue;
11154     }
11155 
11156     SDLoc DL(Op);
11157     // Get the constant value and if needed trunc it to the size of the type.
11158     // Nodes like build_vector might have constants wider than the scalar type.
11159     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
11160     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
11161       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
11162     else
11163       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
11164   }
11165 
11166   return DAG.getBuildVector(VT, DL, Elts);
11167 }
11168 
11169 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
11170 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
11171 // transformation. Returns true if extension are possible and the above
11172 // mentioned transformation is profitable.
11173 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
11174                                     unsigned ExtOpc,
11175                                     SmallVectorImpl<SDNode *> &ExtendNodes,
11176                                     const TargetLowering &TLI) {
11177   bool HasCopyToRegUses = false;
11178   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
11179   for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
11180        ++UI) {
11181     SDNode *User = *UI;
11182     if (User == N)
11183       continue;
11184     if (UI.getUse().getResNo() != N0.getResNo())
11185       continue;
11186     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
11187     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
11188       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
11189       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
11190         // Sign bits will be lost after a zext.
11191         return false;
11192       bool Add = false;
11193       for (unsigned i = 0; i != 2; ++i) {
11194         SDValue UseOp = User->getOperand(i);
11195         if (UseOp == N0)
11196           continue;
11197         if (!isa<ConstantSDNode>(UseOp))
11198           return false;
11199         Add = true;
11200       }
11201       if (Add)
11202         ExtendNodes.push_back(User);
11203       continue;
11204     }
11205     // If truncates aren't free and there are users we can't
11206     // extend, it isn't worthwhile.
11207     if (!isTruncFree)
11208       return false;
11209     // Remember if this value is live-out.
11210     if (User->getOpcode() == ISD::CopyToReg)
11211       HasCopyToRegUses = true;
11212   }
11213 
11214   if (HasCopyToRegUses) {
11215     bool BothLiveOut = false;
11216     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
11217          UI != UE; ++UI) {
11218       SDUse &Use = UI.getUse();
11219       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
11220         BothLiveOut = true;
11221         break;
11222       }
11223     }
11224     if (BothLiveOut)
11225       // Both unextended and extended values are live out. There had better be
11226       // a good reason for the transformation.
11227       return ExtendNodes.size();
11228   }
11229   return true;
11230 }
11231 
11232 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
11233                                   SDValue OrigLoad, SDValue ExtLoad,
11234                                   ISD::NodeType ExtType) {
11235   // Extend SetCC uses if necessary.
11236   SDLoc DL(ExtLoad);
11237   for (SDNode *SetCC : SetCCs) {
11238     SmallVector<SDValue, 4> Ops;
11239 
11240     for (unsigned j = 0; j != 2; ++j) {
11241       SDValue SOp = SetCC->getOperand(j);
11242       if (SOp == OrigLoad)
11243         Ops.push_back(ExtLoad);
11244       else
11245         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
11246     }
11247 
11248     Ops.push_back(SetCC->getOperand(2));
11249     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
11250   }
11251 }
11252 
11253 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
11254 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11255   SDValue N0 = N->getOperand(0);
11256   EVT DstVT = N->getValueType(0);
11257   EVT SrcVT = N0.getValueType();
11258 
11259   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11260           N->getOpcode() == ISD::ZERO_EXTEND) &&
11261          "Unexpected node type (not an extend)!");
11262 
11263   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11264   // For example, on a target with legal v4i32, but illegal v8i32, turn:
11265   //   (v8i32 (sext (v8i16 (load x))))
11266   // into:
11267   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
11268   //                          (v4i32 (sextload (x + 16)))))
11269   // Where uses of the original load, i.e.:
11270   //   (v8i16 (load x))
11271   // are replaced with:
11272   //   (v8i16 (truncate
11273   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
11274   //                            (v4i32 (sextload (x + 16)))))))
11275   //
11276   // This combine is only applicable to illegal, but splittable, vectors.
11277   // All legal types, and illegal non-vector types, are handled elsewhere.
11278   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11279   //
11280   if (N0->getOpcode() != ISD::LOAD)
11281     return SDValue();
11282 
11283   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11284 
11285   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11286       !N0.hasOneUse() || !LN0->isSimple() ||
11287       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11288       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11289     return SDValue();
11290 
11291   SmallVector<SDNode *, 4> SetCCs;
11292   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11293     return SDValue();
11294 
11295   ISD::LoadExtType ExtType =
11296       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11297 
11298   // Try to split the vector types to get down to legal types.
11299   EVT SplitSrcVT = SrcVT;
11300   EVT SplitDstVT = DstVT;
11301   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11302          SplitSrcVT.getVectorNumElements() > 1) {
11303     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11304     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11305   }
11306 
11307   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11308     return SDValue();
11309 
11310   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11311 
11312   SDLoc DL(N);
11313   const unsigned NumSplits =
11314       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11315   const unsigned Stride = SplitSrcVT.getStoreSize();
11316   SmallVector<SDValue, 4> Loads;
11317   SmallVector<SDValue, 4> Chains;
11318 
11319   SDValue BasePtr = LN0->getBasePtr();
11320   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11321     const unsigned Offset = Idx * Stride;
11322     const Align Align = commonAlignment(LN0->getAlign(), Offset);
11323 
11324     SDValue SplitLoad = DAG.getExtLoad(
11325         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11326         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11327         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11328 
11329     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11330 
11331     Loads.push_back(SplitLoad.getValue(0));
11332     Chains.push_back(SplitLoad.getValue(1));
11333   }
11334 
11335   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11336   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11337 
11338   // Simplify TF.
11339   AddToWorklist(NewChain.getNode());
11340 
11341   CombineTo(N, NewValue);
11342 
11343   // Replace uses of the original load (before extension)
11344   // with a truncate of the concatenated sextloaded vectors.
11345   SDValue Trunc =
11346       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11347   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11348   CombineTo(N0.getNode(), Trunc, NewChain);
11349   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11350 }
11351 
11352 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11353 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11354 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11355   assert(N->getOpcode() == ISD::ZERO_EXTEND);
11356   EVT VT = N->getValueType(0);
11357   EVT OrigVT = N->getOperand(0).getValueType();
11358   if (TLI.isZExtFree(OrigVT, VT))
11359     return SDValue();
11360 
11361   // and/or/xor
11362   SDValue N0 = N->getOperand(0);
11363   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11364         N0.getOpcode() == ISD::XOR) ||
11365       N0.getOperand(1).getOpcode() != ISD::Constant ||
11366       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11367     return SDValue();
11368 
11369   // shl/shr
11370   SDValue N1 = N0->getOperand(0);
11371   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11372       N1.getOperand(1).getOpcode() != ISD::Constant ||
11373       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11374     return SDValue();
11375 
11376   // load
11377   if (!isa<LoadSDNode>(N1.getOperand(0)))
11378     return SDValue();
11379   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11380   EVT MemVT = Load->getMemoryVT();
11381   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11382       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11383     return SDValue();
11384 
11385 
11386   // If the shift op is SHL, the logic op must be AND, otherwise the result
11387   // will be wrong.
11388   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11389     return SDValue();
11390 
11391   if (!N0.hasOneUse() || !N1.hasOneUse())
11392     return SDValue();
11393 
11394   SmallVector<SDNode*, 4> SetCCs;
11395   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11396                                ISD::ZERO_EXTEND, SetCCs, TLI))
11397     return SDValue();
11398 
11399   // Actually do the transformation.
11400   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11401                                    Load->getChain(), Load->getBasePtr(),
11402                                    Load->getMemoryVT(), Load->getMemOperand());
11403 
11404   SDLoc DL1(N1);
11405   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11406                               N1.getOperand(1));
11407 
11408   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11409   SDLoc DL0(N0);
11410   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11411                             DAG.getConstant(Mask, DL0, VT));
11412 
11413   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11414   CombineTo(N, And);
11415   if (SDValue(Load, 0).hasOneUse()) {
11416     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11417   } else {
11418     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11419                                 Load->getValueType(0), ExtLoad);
11420     CombineTo(Load, Trunc, ExtLoad.getValue(1));
11421   }
11422 
11423   // N0 is dead at this point.
11424   recursivelyDeleteUnusedNodes(N0.getNode());
11425 
11426   return SDValue(N,0); // Return N so it doesn't get rechecked!
11427 }
11428 
11429 /// If we're narrowing or widening the result of a vector select and the final
11430 /// size is the same size as a setcc (compare) feeding the select, then try to
11431 /// apply the cast operation to the select's operands because matching vector
11432 /// sizes for a select condition and other operands should be more efficient.
11433 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11434   unsigned CastOpcode = Cast->getOpcode();
11435   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11436           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11437           CastOpcode == ISD::FP_ROUND) &&
11438          "Unexpected opcode for vector select narrowing/widening");
11439 
11440   // We only do this transform before legal ops because the pattern may be
11441   // obfuscated by target-specific operations after legalization. Do not create
11442   // an illegal select op, however, because that may be difficult to lower.
11443   EVT VT = Cast->getValueType(0);
11444   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11445     return SDValue();
11446 
11447   SDValue VSel = Cast->getOperand(0);
11448   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11449       VSel.getOperand(0).getOpcode() != ISD::SETCC)
11450     return SDValue();
11451 
11452   // Does the setcc have the same vector size as the casted select?
11453   SDValue SetCC = VSel.getOperand(0);
11454   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11455   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11456     return SDValue();
11457 
11458   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11459   SDValue A = VSel.getOperand(1);
11460   SDValue B = VSel.getOperand(2);
11461   SDValue CastA, CastB;
11462   SDLoc DL(Cast);
11463   if (CastOpcode == ISD::FP_ROUND) {
11464     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11465     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11466     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11467   } else {
11468     CastA = DAG.getNode(CastOpcode, DL, VT, A);
11469     CastB = DAG.getNode(CastOpcode, DL, VT, B);
11470   }
11471   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11472 }
11473 
11474 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11475 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11476 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11477                                      const TargetLowering &TLI, EVT VT,
11478                                      bool LegalOperations, SDNode *N,
11479                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
11480   SDNode *N0Node = N0.getNode();
11481   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11482                                                    : ISD::isZEXTLoad(N0Node);
11483   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11484       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11485     return SDValue();
11486 
11487   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11488   EVT MemVT = LN0->getMemoryVT();
11489   if ((LegalOperations || !LN0->isSimple() ||
11490        VT.isVector()) &&
11491       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11492     return SDValue();
11493 
11494   SDValue ExtLoad =
11495       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11496                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11497   Combiner.CombineTo(N, ExtLoad);
11498   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11499   if (LN0->use_empty())
11500     Combiner.recursivelyDeleteUnusedNodes(LN0);
11501   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11502 }
11503 
11504 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11505 // Only generate vector extloads when 1) they're legal, and 2) they are
11506 // deemed desirable by the target.
11507 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11508                                   const TargetLowering &TLI, EVT VT,
11509                                   bool LegalOperations, SDNode *N, SDValue N0,
11510                                   ISD::LoadExtType ExtLoadType,
11511                                   ISD::NodeType ExtOpc) {
11512   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11513       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11514       ((LegalOperations || VT.isVector() ||
11515         !cast<LoadSDNode>(N0)->isSimple()) &&
11516        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11517     return {};
11518 
11519   bool DoXform = true;
11520   SmallVector<SDNode *, 4> SetCCs;
11521   if (!N0.hasOneUse())
11522     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11523   if (VT.isVector())
11524     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11525   if (!DoXform)
11526     return {};
11527 
11528   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11529   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11530                                    LN0->getBasePtr(), N0.getValueType(),
11531                                    LN0->getMemOperand());
11532   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11533   // If the load value is used only by N, replace it via CombineTo N.
11534   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11535   Combiner.CombineTo(N, ExtLoad);
11536   if (NoReplaceTrunc) {
11537     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11538     Combiner.recursivelyDeleteUnusedNodes(LN0);
11539   } else {
11540     SDValue Trunc =
11541         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11542     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11543   }
11544   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11545 }
11546 
11547 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11548                                         const TargetLowering &TLI, EVT VT,
11549                                         SDNode *N, SDValue N0,
11550                                         ISD::LoadExtType ExtLoadType,
11551                                         ISD::NodeType ExtOpc) {
11552   if (!N0.hasOneUse())
11553     return SDValue();
11554 
11555   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11556   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11557     return SDValue();
11558 
11559   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11560     return SDValue();
11561 
11562   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11563     return SDValue();
11564 
11565   SDLoc dl(Ld);
11566   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11567   SDValue NewLoad = DAG.getMaskedLoad(
11568       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11569       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11570       ExtLoadType, Ld->isExpandingLoad());
11571   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11572   return NewLoad;
11573 }
11574 
11575 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11576                                        bool LegalOperations) {
11577   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11578           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11579 
11580   SDValue SetCC = N->getOperand(0);
11581   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11582       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11583     return SDValue();
11584 
11585   SDValue X = SetCC.getOperand(0);
11586   SDValue Ones = SetCC.getOperand(1);
11587   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11588   EVT VT = N->getValueType(0);
11589   EVT XVT = X.getValueType();
11590   // setge X, C is canonicalized to setgt, so we do not need to match that
11591   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11592   // not require the 'not' op.
11593   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11594     // Invert and smear/shift the sign bit:
11595     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11596     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11597     SDLoc DL(N);
11598     unsigned ShCt = VT.getSizeInBits() - 1;
11599     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11600     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11601       SDValue NotX = DAG.getNOT(DL, X, VT);
11602       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11603       auto ShiftOpcode =
11604         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11605       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11606     }
11607   }
11608   return SDValue();
11609 }
11610 
11611 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11612   SDValue N0 = N->getOperand(0);
11613   if (N0.getOpcode() != ISD::SETCC)
11614     return SDValue();
11615 
11616   SDValue N00 = N0.getOperand(0);
11617   SDValue N01 = N0.getOperand(1);
11618   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11619   EVT VT = N->getValueType(0);
11620   EVT N00VT = N00.getValueType();
11621   SDLoc DL(N);
11622 
11623   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11624   // the same size as the compared operands. Try to optimize sext(setcc())
11625   // if this is the case.
11626   if (VT.isVector() && !LegalOperations &&
11627       TLI.getBooleanContents(N00VT) ==
11628           TargetLowering::ZeroOrNegativeOneBooleanContent) {
11629     EVT SVT = getSetCCResultType(N00VT);
11630 
11631     // If we already have the desired type, don't change it.
11632     if (SVT != N0.getValueType()) {
11633       // We know that the # elements of the results is the same as the
11634       // # elements of the compare (and the # elements of the compare result
11635       // for that matter).  Check to see that they are the same size.  If so,
11636       // we know that the element size of the sext'd result matches the
11637       // element size of the compare operands.
11638       if (VT.getSizeInBits() == SVT.getSizeInBits())
11639         return DAG.getSetCC(DL, VT, N00, N01, CC);
11640 
11641       // If the desired elements are smaller or larger than the source
11642       // elements, we can use a matching integer vector type and then
11643       // truncate/sign extend.
11644       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11645       if (SVT == MatchingVecType) {
11646         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11647         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11648       }
11649     }
11650 
11651     // Try to eliminate the sext of a setcc by zexting the compare operands.
11652     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11653         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11654       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11655       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11656       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11657 
11658       // We have an unsupported narrow vector compare op that would be legal
11659       // if extended to the destination type. See if the compare operands
11660       // can be freely extended to the destination type.
11661       auto IsFreeToExtend = [&](SDValue V) {
11662         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11663           return true;
11664         // Match a simple, non-extended load that can be converted to a
11665         // legal {z/s}ext-load.
11666         // TODO: Allow widening of an existing {z/s}ext-load?
11667         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11668               ISD::isUNINDEXEDLoad(V.getNode()) &&
11669               cast<LoadSDNode>(V)->isSimple() &&
11670               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11671           return false;
11672 
11673         // Non-chain users of this value must either be the setcc in this
11674         // sequence or extends that can be folded into the new {z/s}ext-load.
11675         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11676              UI != UE; ++UI) {
11677           // Skip uses of the chain and the setcc.
11678           SDNode *User = *UI;
11679           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11680             continue;
11681           // Extra users must have exactly the same cast we are about to create.
11682           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11683           //       is enhanced similarly.
11684           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11685             return false;
11686         }
11687         return true;
11688       };
11689 
11690       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11691         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11692         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11693         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11694       }
11695     }
11696   }
11697 
11698   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11699   // Here, T can be 1 or -1, depending on the type of the setcc and
11700   // getBooleanContents().
11701   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11702 
11703   // To determine the "true" side of the select, we need to know the high bit
11704   // of the value returned by the setcc if it evaluates to true.
11705   // If the type of the setcc is i1, then the true case of the select is just
11706   // sext(i1 1), that is, -1.
11707   // If the type of the setcc is larger (say, i8) then the value of the high
11708   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11709   // of the appropriate width.
11710   SDValue ExtTrueVal = (SetCCWidth == 1)
11711                            ? DAG.getAllOnesConstant(DL, VT)
11712                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11713   SDValue Zero = DAG.getConstant(0, DL, VT);
11714   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11715     return SCC;
11716 
11717   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11718     EVT SetCCVT = getSetCCResultType(N00VT);
11719     // Don't do this transform for i1 because there's a select transform
11720     // that would reverse it.
11721     // TODO: We should not do this transform at all without a target hook
11722     // because a sext is likely cheaper than a select?
11723     if (SetCCVT.getScalarSizeInBits() != 1 &&
11724         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11725       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11726       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11727     }
11728   }
11729 
11730   return SDValue();
11731 }
11732 
11733 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11734   SDValue N0 = N->getOperand(0);
11735   EVT VT = N->getValueType(0);
11736   SDLoc DL(N);
11737 
11738   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11739     return Res;
11740 
11741   // fold (sext (sext x)) -> (sext x)
11742   // fold (sext (aext x)) -> (sext x)
11743   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11744     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11745 
11746   if (N0.getOpcode() == ISD::TRUNCATE) {
11747     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11748     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11749     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11750       SDNode *oye = N0.getOperand(0).getNode();
11751       if (NarrowLoad.getNode() != N0.getNode()) {
11752         CombineTo(N0.getNode(), NarrowLoad);
11753         // CombineTo deleted the truncate, if needed, but not what's under it.
11754         AddToWorklist(oye);
11755       }
11756       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11757     }
11758 
11759     // See if the value being truncated is already sign extended.  If so, just
11760     // eliminate the trunc/sext pair.
11761     SDValue Op = N0.getOperand(0);
11762     unsigned OpBits   = Op.getScalarValueSizeInBits();
11763     unsigned MidBits  = N0.getScalarValueSizeInBits();
11764     unsigned DestBits = VT.getScalarSizeInBits();
11765     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11766 
11767     if (OpBits == DestBits) {
11768       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11769       // bits, it is already ready.
11770       if (NumSignBits > DestBits-MidBits)
11771         return Op;
11772     } else if (OpBits < DestBits) {
11773       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11774       // bits, just sext from i32.
11775       if (NumSignBits > OpBits-MidBits)
11776         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11777     } else {
11778       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11779       // bits, just truncate to i32.
11780       if (NumSignBits > OpBits-MidBits)
11781         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11782     }
11783 
11784     // fold (sext (truncate x)) -> (sextinreg x).
11785     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11786                                                  N0.getValueType())) {
11787       if (OpBits < DestBits)
11788         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11789       else if (OpBits > DestBits)
11790         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11791       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11792                          DAG.getValueType(N0.getValueType()));
11793     }
11794   }
11795 
11796   // Try to simplify (sext (load x)).
11797   if (SDValue foldedExt =
11798           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11799                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11800     return foldedExt;
11801 
11802   if (SDValue foldedExt =
11803       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11804                                ISD::SIGN_EXTEND))
11805     return foldedExt;
11806 
11807   // fold (sext (load x)) to multiple smaller sextloads.
11808   // Only on illegal but splittable vectors.
11809   if (SDValue ExtLoad = CombineExtLoad(N))
11810     return ExtLoad;
11811 
11812   // Try to simplify (sext (sextload x)).
11813   if (SDValue foldedExt = tryToFoldExtOfExtload(
11814           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11815     return foldedExt;
11816 
11817   // fold (sext (and/or/xor (load x), cst)) ->
11818   //      (and/or/xor (sextload x), (sext cst))
11819   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11820        N0.getOpcode() == ISD::XOR) &&
11821       isa<LoadSDNode>(N0.getOperand(0)) &&
11822       N0.getOperand(1).getOpcode() == ISD::Constant &&
11823       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11824     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11825     EVT MemVT = LN00->getMemoryVT();
11826     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11827       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11828       SmallVector<SDNode*, 4> SetCCs;
11829       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11830                                              ISD::SIGN_EXTEND, SetCCs, TLI);
11831       if (DoXform) {
11832         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
11833                                          LN00->getChain(), LN00->getBasePtr(),
11834                                          LN00->getMemoryVT(),
11835                                          LN00->getMemOperand());
11836         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
11837         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11838                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11839         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
11840         bool NoReplaceTruncAnd = !N0.hasOneUse();
11841         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11842         CombineTo(N, And);
11843         // If N0 has multiple uses, change other uses as well.
11844         if (NoReplaceTruncAnd) {
11845           SDValue TruncAnd =
11846               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11847           CombineTo(N0.getNode(), TruncAnd);
11848         }
11849         if (NoReplaceTrunc) {
11850           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11851         } else {
11852           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11853                                       LN00->getValueType(0), ExtLoad);
11854           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11855         }
11856         return SDValue(N,0); // Return N so it doesn't get rechecked!
11857       }
11858     }
11859   }
11860 
11861   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11862     return V;
11863 
11864   if (SDValue V = foldSextSetcc(N))
11865     return V;
11866 
11867   // fold (sext x) -> (zext x) if the sign bit is known zero.
11868   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
11869       DAG.SignBitIsZero(N0))
11870     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
11871 
11872   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11873     return NewVSel;
11874 
11875   // Eliminate this sign extend by doing a negation in the destination type:
11876   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
11877   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
11878       isNullOrNullSplat(N0.getOperand(0)) &&
11879       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
11880       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
11881     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
11882     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
11883   }
11884   // Eliminate this sign extend by doing a decrement in the destination type:
11885   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
11886   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
11887       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
11888       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11889       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
11890     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
11891     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11892   }
11893 
11894   // fold sext (not i1 X) -> add (zext i1 X), -1
11895   // TODO: This could be extended to handle bool vectors.
11896   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
11897       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11898                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11899     // If we can eliminate the 'not', the sext form should be better
11900     if (SDValue NewXor = visitXOR(N0.getNode())) {
11901       // Returning N0 is a form of in-visit replacement that may have
11902       // invalidated N0.
11903       if (NewXor.getNode() == N0.getNode()) {
11904         // Return SDValue here as the xor should have already been replaced in
11905         // this sext.
11906         return SDValue();
11907       } else {
11908         // Return a new sext with the new xor.
11909         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11910       }
11911     }
11912 
11913     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11914     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11915   }
11916 
11917   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11918     return Res;
11919 
11920   return SDValue();
11921 }
11922 
11923 // isTruncateOf - If N is a truncate of some other value, return true, record
11924 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11925 // This function computes KnownBits to avoid a duplicated call to
11926 // computeKnownBits in the caller.
11927 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11928                          KnownBits &Known) {
11929   if (N->getOpcode() == ISD::TRUNCATE) {
11930     Op = N->getOperand(0);
11931     Known = DAG.computeKnownBits(Op);
11932     return true;
11933   }
11934 
11935   if (N.getOpcode() != ISD::SETCC ||
11936       N.getValueType().getScalarType() != MVT::i1 ||
11937       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11938     return false;
11939 
11940   SDValue Op0 = N->getOperand(0);
11941   SDValue Op1 = N->getOperand(1);
11942   assert(Op0.getValueType() == Op1.getValueType());
11943 
11944   if (isNullOrNullSplat(Op0))
11945     Op = Op1;
11946   else if (isNullOrNullSplat(Op1))
11947     Op = Op0;
11948   else
11949     return false;
11950 
11951   Known = DAG.computeKnownBits(Op);
11952 
11953   return (Known.Zero | 1).isAllOnes();
11954 }
11955 
11956 /// Given an extending node with a pop-count operand, if the target does not
11957 /// support a pop-count in the narrow source type but does support it in the
11958 /// destination type, widen the pop-count to the destination type.
11959 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11960   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11961           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11962 
11963   SDValue CtPop = Extend->getOperand(0);
11964   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11965     return SDValue();
11966 
11967   EVT VT = Extend->getValueType(0);
11968   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11969   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11970       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11971     return SDValue();
11972 
11973   // zext (ctpop X) --> ctpop (zext X)
11974   SDLoc DL(Extend);
11975   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11976   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11977 }
11978 
11979 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11980   SDValue N0 = N->getOperand(0);
11981   EVT VT = N->getValueType(0);
11982 
11983   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11984     return Res;
11985 
11986   // fold (zext (zext x)) -> (zext x)
11987   // fold (zext (aext x)) -> (zext x)
11988   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11989     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11990                        N0.getOperand(0));
11991 
11992   // fold (zext (truncate x)) -> (zext x) or
11993   //      (zext (truncate x)) -> (truncate x)
11994   // This is valid when the truncated bits of x are already zero.
11995   SDValue Op;
11996   KnownBits Known;
11997   if (isTruncateOf(DAG, N0, Op, Known)) {
11998     APInt TruncatedBits =
11999       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
12000       APInt(Op.getScalarValueSizeInBits(), 0) :
12001       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
12002                         N0.getScalarValueSizeInBits(),
12003                         std::min(Op.getScalarValueSizeInBits(),
12004                                  VT.getScalarSizeInBits()));
12005     if (TruncatedBits.isSubsetOf(Known.Zero))
12006       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12007   }
12008 
12009   // fold (zext (truncate x)) -> (and x, mask)
12010   if (N0.getOpcode() == ISD::TRUNCATE) {
12011     // fold (zext (truncate (load x))) -> (zext (smaller load x))
12012     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
12013     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12014       SDNode *oye = N0.getOperand(0).getNode();
12015       if (NarrowLoad.getNode() != N0.getNode()) {
12016         CombineTo(N0.getNode(), NarrowLoad);
12017         // CombineTo deleted the truncate, if needed, but not what's under it.
12018         AddToWorklist(oye);
12019       }
12020       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12021     }
12022 
12023     EVT SrcVT = N0.getOperand(0).getValueType();
12024     EVT MinVT = N0.getValueType();
12025 
12026     // Try to mask before the extension to avoid having to generate a larger mask,
12027     // possibly over several sub-vectors.
12028     if (SrcVT.bitsLT(VT) && VT.isVector()) {
12029       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
12030                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
12031         SDValue Op = N0.getOperand(0);
12032         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12033         AddToWorklist(Op.getNode());
12034         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12035         // Transfer the debug info; the new node is equivalent to N0.
12036         DAG.transferDbgValues(N0, ZExtOrTrunc);
12037         return ZExtOrTrunc;
12038       }
12039     }
12040 
12041     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
12042       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12043       AddToWorklist(Op.getNode());
12044       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12045       // We may safely transfer the debug info describing the truncate node over
12046       // to the equivalent and operation.
12047       DAG.transferDbgValues(N0, And);
12048       return And;
12049     }
12050   }
12051 
12052   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
12053   // if either of the casts is not free.
12054   if (N0.getOpcode() == ISD::AND &&
12055       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12056       N0.getOperand(1).getOpcode() == ISD::Constant &&
12057       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12058                            N0.getValueType()) ||
12059        !TLI.isZExtFree(N0.getValueType(), VT))) {
12060     SDValue X = N0.getOperand(0).getOperand(0);
12061     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
12062     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12063     SDLoc DL(N);
12064     return DAG.getNode(ISD::AND, DL, VT,
12065                        X, DAG.getConstant(Mask, DL, VT));
12066   }
12067 
12068   // Try to simplify (zext (load x)).
12069   if (SDValue foldedExt =
12070           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12071                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12072     return foldedExt;
12073 
12074   if (SDValue foldedExt =
12075       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
12076                                ISD::ZERO_EXTEND))
12077     return foldedExt;
12078 
12079   // fold (zext (load x)) to multiple smaller zextloads.
12080   // Only on illegal but splittable vectors.
12081   if (SDValue ExtLoad = CombineExtLoad(N))
12082     return ExtLoad;
12083 
12084   // fold (zext (and/or/xor (load x), cst)) ->
12085   //      (and/or/xor (zextload x), (zext cst))
12086   // Unless (and (load x) cst) will match as a zextload already and has
12087   // additional users.
12088   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12089        N0.getOpcode() == ISD::XOR) &&
12090       isa<LoadSDNode>(N0.getOperand(0)) &&
12091       N0.getOperand(1).getOpcode() == ISD::Constant &&
12092       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12093     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12094     EVT MemVT = LN00->getMemoryVT();
12095     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
12096         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
12097       bool DoXform = true;
12098       SmallVector<SDNode*, 4> SetCCs;
12099       if (!N0.hasOneUse()) {
12100         if (N0.getOpcode() == ISD::AND) {
12101           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
12102           EVT LoadResultTy = AndC->getValueType(0);
12103           EVT ExtVT;
12104           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
12105             DoXform = false;
12106         }
12107       }
12108       if (DoXform)
12109         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12110                                           ISD::ZERO_EXTEND, SetCCs, TLI);
12111       if (DoXform) {
12112         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
12113                                          LN00->getChain(), LN00->getBasePtr(),
12114                                          LN00->getMemoryVT(),
12115                                          LN00->getMemOperand());
12116         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12117         SDLoc DL(N);
12118         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12119                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
12120         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12121         bool NoReplaceTruncAnd = !N0.hasOneUse();
12122         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12123         CombineTo(N, And);
12124         // If N0 has multiple uses, change other uses as well.
12125         if (NoReplaceTruncAnd) {
12126           SDValue TruncAnd =
12127               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12128           CombineTo(N0.getNode(), TruncAnd);
12129         }
12130         if (NoReplaceTrunc) {
12131           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12132         } else {
12133           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12134                                       LN00->getValueType(0), ExtLoad);
12135           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12136         }
12137         return SDValue(N,0); // Return N so it doesn't get rechecked!
12138       }
12139     }
12140   }
12141 
12142   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12143   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12144   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
12145     return ZExtLoad;
12146 
12147   // Try to simplify (zext (zextload x)).
12148   if (SDValue foldedExt = tryToFoldExtOfExtload(
12149           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
12150     return foldedExt;
12151 
12152   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12153     return V;
12154 
12155   if (N0.getOpcode() == ISD::SETCC) {
12156     // Only do this before legalize for now.
12157     if (!LegalOperations && VT.isVector() &&
12158         N0.getValueType().getVectorElementType() == MVT::i1) {
12159       EVT N00VT = N0.getOperand(0).getValueType();
12160       if (getSetCCResultType(N00VT) == N0.getValueType())
12161         return SDValue();
12162 
12163       // We know that the # elements of the results is the same as the #
12164       // elements of the compare (and the # elements of the compare result for
12165       // that matter). Check to see that they are the same size. If so, we know
12166       // that the element size of the sext'd result matches the element size of
12167       // the compare operands.
12168       SDLoc DL(N);
12169       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
12170         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
12171         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
12172                                      N0.getOperand(1), N0.getOperand(2));
12173         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
12174       }
12175 
12176       // If the desired elements are smaller or larger than the source
12177       // elements we can use a matching integer vector type and then
12178       // truncate/any extend followed by zext_in_reg.
12179       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12180       SDValue VsetCC =
12181           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
12182                       N0.getOperand(1), N0.getOperand(2));
12183       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
12184                                     N0.getValueType());
12185     }
12186 
12187     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
12188     SDLoc DL(N);
12189     EVT N0VT = N0.getValueType();
12190     EVT N00VT = N0.getOperand(0).getValueType();
12191     if (SDValue SCC = SimplifySelectCC(
12192             DL, N0.getOperand(0), N0.getOperand(1),
12193             DAG.getBoolConstant(true, DL, N0VT, N00VT),
12194             DAG.getBoolConstant(false, DL, N0VT, N00VT),
12195             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12196       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
12197   }
12198 
12199   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
12200   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
12201       isa<ConstantSDNode>(N0.getOperand(1)) &&
12202       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12203       N0.hasOneUse()) {
12204     SDValue ShAmt = N0.getOperand(1);
12205     if (N0.getOpcode() == ISD::SHL) {
12206       SDValue InnerZExt = N0.getOperand(0);
12207       // If the original shl may be shifting out bits, do not perform this
12208       // transformation.
12209       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
12210         InnerZExt.getOperand(0).getValueSizeInBits();
12211       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
12212         return SDValue();
12213     }
12214 
12215     SDLoc DL(N);
12216 
12217     // Ensure that the shift amount is wide enough for the shifted value.
12218     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
12219       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
12220 
12221     return DAG.getNode(N0.getOpcode(), DL, VT,
12222                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
12223                        ShAmt);
12224   }
12225 
12226   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12227     return NewVSel;
12228 
12229   if (SDValue NewCtPop = widenCtPop(N, DAG))
12230     return NewCtPop;
12231 
12232   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12233     return Res;
12234 
12235   return SDValue();
12236 }
12237 
12238 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
12239   SDValue N0 = N->getOperand(0);
12240   EVT VT = N->getValueType(0);
12241 
12242   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12243     return Res;
12244 
12245   // fold (aext (aext x)) -> (aext x)
12246   // fold (aext (zext x)) -> (zext x)
12247   // fold (aext (sext x)) -> (sext x)
12248   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
12249       N0.getOpcode() == ISD::ZERO_EXTEND ||
12250       N0.getOpcode() == ISD::SIGN_EXTEND)
12251     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12252 
12253   // fold (aext (truncate (load x))) -> (aext (smaller load x))
12254   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12255   if (N0.getOpcode() == ISD::TRUNCATE) {
12256     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12257       SDNode *oye = N0.getOperand(0).getNode();
12258       if (NarrowLoad.getNode() != N0.getNode()) {
12259         CombineTo(N0.getNode(), NarrowLoad);
12260         // CombineTo deleted the truncate, if needed, but not what's under it.
12261         AddToWorklist(oye);
12262       }
12263       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12264     }
12265   }
12266 
12267   // fold (aext (truncate x))
12268   if (N0.getOpcode() == ISD::TRUNCATE)
12269     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12270 
12271   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12272   // if the trunc is not free.
12273   if (N0.getOpcode() == ISD::AND &&
12274       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12275       N0.getOperand(1).getOpcode() == ISD::Constant &&
12276       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12277                           N0.getValueType())) {
12278     SDLoc DL(N);
12279     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12280     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
12281     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
12282     return DAG.getNode(ISD::AND, DL, VT, X, Y);
12283   }
12284 
12285   // fold (aext (load x)) -> (aext (truncate (extload x)))
12286   // None of the supported targets knows how to perform load and any_ext
12287   // on vectors in one instruction, so attempt to fold to zext instead.
12288   if (VT.isVector()) {
12289     // Try to simplify (zext (load x)).
12290     if (SDValue foldedExt =
12291             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12292                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12293       return foldedExt;
12294   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12295              ISD::isUNINDEXEDLoad(N0.getNode()) &&
12296              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12297     bool DoXform = true;
12298     SmallVector<SDNode *, 4> SetCCs;
12299     if (!N0.hasOneUse())
12300       DoXform =
12301           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12302     if (DoXform) {
12303       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12304       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12305                                        LN0->getChain(), LN0->getBasePtr(),
12306                                        N0.getValueType(), LN0->getMemOperand());
12307       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12308       // If the load value is used only by N, replace it via CombineTo N.
12309       bool NoReplaceTrunc = N0.hasOneUse();
12310       CombineTo(N, ExtLoad);
12311       if (NoReplaceTrunc) {
12312         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12313         recursivelyDeleteUnusedNodes(LN0);
12314       } else {
12315         SDValue Trunc =
12316             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12317         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12318       }
12319       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12320     }
12321   }
12322 
12323   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12324   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12325   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
12326   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12327       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12328     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12329     ISD::LoadExtType ExtType = LN0->getExtensionType();
12330     EVT MemVT = LN0->getMemoryVT();
12331     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12332       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12333                                        VT, LN0->getChain(), LN0->getBasePtr(),
12334                                        MemVT, LN0->getMemOperand());
12335       CombineTo(N, ExtLoad);
12336       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12337       recursivelyDeleteUnusedNodes(LN0);
12338       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12339     }
12340   }
12341 
12342   if (N0.getOpcode() == ISD::SETCC) {
12343     // For vectors:
12344     // aext(setcc) -> vsetcc
12345     // aext(setcc) -> truncate(vsetcc)
12346     // aext(setcc) -> aext(vsetcc)
12347     // Only do this before legalize for now.
12348     if (VT.isVector() && !LegalOperations) {
12349       EVT N00VT = N0.getOperand(0).getValueType();
12350       if (getSetCCResultType(N00VT) == N0.getValueType())
12351         return SDValue();
12352 
12353       // We know that the # elements of the results is the same as the
12354       // # elements of the compare (and the # elements of the compare result
12355       // for that matter).  Check to see that they are the same size.  If so,
12356       // we know that the element size of the sext'd result matches the
12357       // element size of the compare operands.
12358       if (VT.getSizeInBits() == N00VT.getSizeInBits())
12359         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12360                              N0.getOperand(1),
12361                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
12362 
12363       // If the desired elements are smaller or larger than the source
12364       // elements we can use a matching integer vector type and then
12365       // truncate/any extend
12366       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12367       SDValue VsetCC =
12368         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12369                       N0.getOperand(1),
12370                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
12371       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12372     }
12373 
12374     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12375     SDLoc DL(N);
12376     if (SDValue SCC = SimplifySelectCC(
12377             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12378             DAG.getConstant(0, DL, VT),
12379             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12380       return SCC;
12381   }
12382 
12383   if (SDValue NewCtPop = widenCtPop(N, DAG))
12384     return NewCtPop;
12385 
12386   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12387     return Res;
12388 
12389   return SDValue();
12390 }
12391 
12392 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12393   unsigned Opcode = N->getOpcode();
12394   SDValue N0 = N->getOperand(0);
12395   SDValue N1 = N->getOperand(1);
12396   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12397 
12398   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12399   if (N0.getOpcode() == Opcode &&
12400       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12401     return N0;
12402 
12403   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12404       N0.getOperand(0).getOpcode() == Opcode) {
12405     // We have an assert, truncate, assert sandwich. Make one stronger assert
12406     // by asserting on the smallest asserted type to the larger source type.
12407     // This eliminates the later assert:
12408     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12409     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12410     SDValue BigA = N0.getOperand(0);
12411     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12412     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12413            "Asserting zero/sign-extended bits to a type larger than the "
12414            "truncated destination does not provide information");
12415 
12416     SDLoc DL(N);
12417     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12418     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12419     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12420                                     BigA.getOperand(0), MinAssertVTVal);
12421     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12422   }
12423 
12424   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12425   // than X. Just move the AssertZext in front of the truncate and drop the
12426   // AssertSExt.
12427   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12428       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12429       Opcode == ISD::AssertZext) {
12430     SDValue BigA = N0.getOperand(0);
12431     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12432     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
12433            "Asserting zero/sign-extended bits to a type larger than the "
12434            "truncated destination does not provide information");
12435 
12436     if (AssertVT.bitsLT(BigA_AssertVT)) {
12437       SDLoc DL(N);
12438       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12439                                       BigA.getOperand(0), N1);
12440       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12441     }
12442   }
12443 
12444   return SDValue();
12445 }
12446 
12447 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12448   SDLoc DL(N);
12449 
12450   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12451   SDValue N0 = N->getOperand(0);
12452 
12453   // Fold (assertalign (assertalign x, AL0), AL1) ->
12454   // (assertalign x, max(AL0, AL1))
12455   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12456     return DAG.getAssertAlign(DL, N0.getOperand(0),
12457                               std::max(AL, AAN->getAlign()));
12458 
12459   // In rare cases, there are trivial arithmetic ops in source operands. Sink
12460   // this assert down to source operands so that those arithmetic ops could be
12461   // exposed to the DAG combining.
12462   switch (N0.getOpcode()) {
12463   default:
12464     break;
12465   case ISD::ADD:
12466   case ISD::SUB: {
12467     unsigned AlignShift = Log2(AL);
12468     SDValue LHS = N0.getOperand(0);
12469     SDValue RHS = N0.getOperand(1);
12470     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12471     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12472     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12473       if (LHSAlignShift < AlignShift)
12474         LHS = DAG.getAssertAlign(DL, LHS, AL);
12475       if (RHSAlignShift < AlignShift)
12476         RHS = DAG.getAssertAlign(DL, RHS, AL);
12477       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12478     }
12479     break;
12480   }
12481   }
12482 
12483   return SDValue();
12484 }
12485 
12486 /// If the result of a load is shifted/masked/truncated to an effectively
12487 /// narrower type, try to transform the load to a narrower type and/or
12488 /// use an extending load.
12489 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12490   unsigned Opc = N->getOpcode();
12491 
12492   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12493   SDValue N0 = N->getOperand(0);
12494   EVT VT = N->getValueType(0);
12495   EVT ExtVT = VT;
12496 
12497   // This transformation isn't valid for vector loads.
12498   if (VT.isVector())
12499     return SDValue();
12500 
12501   // The ShAmt variable is used to indicate that we've consumed a right
12502   // shift. I.e. we want to narrow the width of the load by skipping to load the
12503   // ShAmt least significant bits.
12504   unsigned ShAmt = 0;
12505   // A special case is when the least significant bits from the load are masked
12506   // away, but using an AND rather than a right shift. HasShiftedOffset is used
12507   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12508   // the result.
12509   bool HasShiftedOffset = false;
12510   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12511   // extended to VT.
12512   if (Opc == ISD::SIGN_EXTEND_INREG) {
12513     ExtType = ISD::SEXTLOAD;
12514     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12515   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12516     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12517     // value, or it may be shifting a higher subword, half or byte into the
12518     // lowest bits.
12519 
12520     // Only handle shift with constant shift amount, and the shiftee must be a
12521     // load.
12522     auto *LN = dyn_cast<LoadSDNode>(N0);
12523     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12524     if (!N1C || !LN)
12525       return SDValue();
12526     // If the shift amount is larger than the memory type then we're not
12527     // accessing any of the loaded bytes.
12528     ShAmt = N1C->getZExtValue();
12529     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12530     if (MemoryWidth <= ShAmt)
12531       return SDValue();
12532     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12533     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12534     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12535     // If original load is a SEXTLOAD then we can't simply replace it by a
12536     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12537     // followed by a ZEXT, but that is not handled at the moment). Similarly if
12538     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12539     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12540          LN->getExtensionType() == ISD::ZEXTLOAD) &&
12541         LN->getExtensionType() != ExtType)
12542       return SDValue();
12543   } else if (Opc == ISD::AND) {
12544     // An AND with a constant mask is the same as a truncate + zero-extend.
12545     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12546     if (!AndC)
12547       return SDValue();
12548 
12549     const APInt &Mask = AndC->getAPIntValue();
12550     unsigned ActiveBits = 0;
12551     if (Mask.isMask()) {
12552       ActiveBits = Mask.countTrailingOnes();
12553     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
12554       HasShiftedOffset = true;
12555     } else
12556       return SDValue();
12557 
12558     ExtType = ISD::ZEXTLOAD;
12559     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12560   }
12561 
12562   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12563   // a right shift. Here we redo some of those checks, to possibly adjust the
12564   // ExtVT even further based on "a masking AND". We could also end up here for
12565   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12566   // need to be done here as well.
12567   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12568     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12569     // Bail out when the SRL has more than one use. This is done for historical
12570     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12571     // check below? And maybe it could be non-profitable to do the transform in
12572     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12573     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12574     if (!SRL.hasOneUse())
12575       return SDValue();
12576 
12577     // Only handle shift with constant shift amount, and the shiftee must be a
12578     // load.
12579     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12580     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12581     if (!SRL1C || !LN)
12582       return SDValue();
12583 
12584     // If the shift amount is larger than the input type then we're not
12585     // accessing any of the loaded bytes.  If the load was a zextload/extload
12586     // then the result of the shift+trunc is zero/undef (handled elsewhere).
12587     ShAmt = SRL1C->getZExtValue();
12588     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12589     if (ShAmt >= MemoryWidth)
12590       return SDValue();
12591 
12592     // Because a SRL must be assumed to *need* to zero-extend the high bits
12593     // (as opposed to anyext the high bits), we can't combine the zextload
12594     // lowering of SRL and an sextload.
12595     if (LN->getExtensionType() == ISD::SEXTLOAD)
12596       return SDValue();
12597 
12598     // Avoid reading outside the memory accessed by the original load (could
12599     // happened if we only adjust the load base pointer by ShAmt). Instead we
12600     // try to narrow the load even further. The typical scenario here is:
12601     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
12602     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12603     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12604       // Don't replace sextload by zextload.
12605       if (ExtType == ISD::SEXTLOAD)
12606         return SDValue();
12607       // Narrow the load.
12608       ExtType = ISD::ZEXTLOAD;
12609       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12610     }
12611 
12612     // If the SRL is only used by a masking AND, we may be able to adjust
12613     // the ExtVT to make the AND redundant.
12614     SDNode *Mask = *(SRL->use_begin());
12615     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12616         isa<ConstantSDNode>(Mask->getOperand(1))) {
12617       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12618       if (ShiftMask.isMask()) {
12619         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12620                                          ShiftMask.countTrailingOnes());
12621         // If the mask is smaller, recompute the type.
12622         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12623             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12624           ExtVT = MaskedVT;
12625       }
12626     }
12627 
12628     N0 = SRL.getOperand(0);
12629   }
12630 
12631   // If the load is shifted left (and the result isn't shifted back right), we
12632   // can fold a truncate through the shift. The typical scenario is that N
12633   // points at a TRUNCATE here so the attempted fold is:
12634   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12635   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12636   unsigned ShLeftAmt = 0;
12637   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12638       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12639     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12640       ShLeftAmt = N01->getZExtValue();
12641       N0 = N0.getOperand(0);
12642     }
12643   }
12644 
12645   // If we haven't found a load, we can't narrow it.
12646   if (!isa<LoadSDNode>(N0))
12647     return SDValue();
12648 
12649   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12650   // Reducing the width of a volatile load is illegal.  For atomics, we may be
12651   // able to reduce the width provided we never widen again. (see D66309)
12652   if (!LN0->isSimple() ||
12653       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12654     return SDValue();
12655 
12656   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12657     unsigned LVTStoreBits =
12658         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12659     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12660     return LVTStoreBits - EVTStoreBits - ShAmt;
12661   };
12662 
12663   // We need to adjust the pointer to the load by ShAmt bits in order to load
12664   // the correct bytes.
12665   unsigned PtrAdjustmentInBits =
12666       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12667 
12668   uint64_t PtrOff = PtrAdjustmentInBits / 8;
12669   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12670   SDLoc DL(LN0);
12671   // The original load itself didn't wrap, so an offset within it doesn't.
12672   SDNodeFlags Flags;
12673   Flags.setNoUnsignedWrap(true);
12674   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12675                                             TypeSize::Fixed(PtrOff), DL, Flags);
12676   AddToWorklist(NewPtr.getNode());
12677 
12678   SDValue Load;
12679   if (ExtType == ISD::NON_EXTLOAD)
12680     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12681                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12682                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12683   else
12684     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12685                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12686                           NewAlign, LN0->getMemOperand()->getFlags(),
12687                           LN0->getAAInfo());
12688 
12689   // Replace the old load's chain with the new load's chain.
12690   WorklistRemover DeadNodes(*this);
12691   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12692 
12693   // Shift the result left, if we've swallowed a left shift.
12694   SDValue Result = Load;
12695   if (ShLeftAmt != 0) {
12696     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12697     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12698       ShImmTy = VT;
12699     // If the shift amount is as large as the result size (but, presumably,
12700     // no larger than the source) then the useful bits of the result are
12701     // zero; we can't simply return the shortened shift, because the result
12702     // of that operation is undefined.
12703     if (ShLeftAmt >= VT.getScalarSizeInBits())
12704       Result = DAG.getConstant(0, DL, VT);
12705     else
12706       Result = DAG.getNode(ISD::SHL, DL, VT,
12707                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12708   }
12709 
12710   if (HasShiftedOffset) {
12711     // We're using a shifted mask, so the load now has an offset. This means
12712     // that data has been loaded into the lower bytes than it would have been
12713     // before, so we need to shl the loaded data into the correct position in the
12714     // register.
12715     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12716     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12717     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12718   }
12719 
12720   // Return the new loaded value.
12721   return Result;
12722 }
12723 
12724 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12725   SDValue N0 = N->getOperand(0);
12726   SDValue N1 = N->getOperand(1);
12727   EVT VT = N->getValueType(0);
12728   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12729   unsigned VTBits = VT.getScalarSizeInBits();
12730   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12731 
12732   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12733   if (N0.isUndef())
12734     return DAG.getConstant(0, SDLoc(N), VT);
12735 
12736   // fold (sext_in_reg c1) -> c1
12737   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12738     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12739 
12740   // If the input is already sign extended, just drop the extension.
12741   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12742     return N0;
12743 
12744   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12745   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12746       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12747     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12748                        N1);
12749 
12750   // fold (sext_in_reg (sext x)) -> (sext x)
12751   // fold (sext_in_reg (aext x)) -> (sext x)
12752   // if x is small enough or if we know that x has more than 1 sign bit and the
12753   // sign_extend_inreg is extending from one of them.
12754   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12755     SDValue N00 = N0.getOperand(0);
12756     unsigned N00Bits = N00.getScalarValueSizeInBits();
12757     if ((N00Bits <= ExtVTBits ||
12758          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12759         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12760       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12761   }
12762 
12763   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12764   // if x is small enough or if we know that x has more than 1 sign bit and the
12765   // sign_extend_inreg is extending from one of them.
12766   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12767       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12768       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12769     SDValue N00 = N0.getOperand(0);
12770     unsigned N00Bits = N00.getScalarValueSizeInBits();
12771     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12772     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12773     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12774     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12775     if ((N00Bits == ExtVTBits ||
12776          (!IsZext && (N00Bits < ExtVTBits ||
12777                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12778         (!LegalOperations ||
12779          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12780       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12781   }
12782 
12783   // fold (sext_in_reg (zext x)) -> (sext x)
12784   // iff we are extending the source sign bit.
12785   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12786     SDValue N00 = N0.getOperand(0);
12787     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12788         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12789       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12790   }
12791 
12792   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12793   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12794     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12795 
12796   // fold operands of sext_in_reg based on knowledge that the top bits are not
12797   // demanded.
12798   if (SimplifyDemandedBits(SDValue(N, 0)))
12799     return SDValue(N, 0);
12800 
12801   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12802   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12803   if (SDValue NarrowLoad = reduceLoadWidth(N))
12804     return NarrowLoad;
12805 
12806   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12807   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12808   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12809   if (N0.getOpcode() == ISD::SRL) {
12810     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12811       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12812         // We can turn this into an SRA iff the input to the SRL is already sign
12813         // extended enough.
12814         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12815         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12816           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12817                              N0.getOperand(1));
12818       }
12819   }
12820 
12821   // fold (sext_inreg (extload x)) -> (sextload x)
12822   // If sextload is not supported by target, we can only do the combine when
12823   // load has one use. Doing otherwise can block folding the extload with other
12824   // extends that the target does support.
12825   if (ISD::isEXTLoad(N0.getNode()) &&
12826       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12827       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12828       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
12829         N0.hasOneUse()) ||
12830        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12831     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12832     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12833                                      LN0->getChain(),
12834                                      LN0->getBasePtr(), ExtVT,
12835                                      LN0->getMemOperand());
12836     CombineTo(N, ExtLoad);
12837     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12838     AddToWorklist(ExtLoad.getNode());
12839     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12840   }
12841 
12842   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
12843   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
12844       N0.hasOneUse() &&
12845       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12846       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
12847        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
12848     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12849     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
12850                                      LN0->getChain(),
12851                                      LN0->getBasePtr(), ExtVT,
12852                                      LN0->getMemOperand());
12853     CombineTo(N, ExtLoad);
12854     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12855     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12856   }
12857 
12858   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
12859   // ignore it if the masked load is already sign extended
12860   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
12861     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
12862         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
12863         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
12864       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
12865           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
12866           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
12867           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
12868       CombineTo(N, ExtMaskedLoad);
12869       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
12870       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12871     }
12872   }
12873 
12874   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
12875   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
12876     if (SDValue(GN0, 0).hasOneUse() &&
12877         ExtVT == GN0->getMemoryVT() &&
12878         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
12879       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
12880                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
12881 
12882       SDValue ExtLoad = DAG.getMaskedGather(
12883           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
12884           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
12885 
12886       CombineTo(N, ExtLoad);
12887       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
12888       AddToWorklist(ExtLoad.getNode());
12889       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12890     }
12891   }
12892 
12893   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
12894   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
12895     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
12896                                            N0.getOperand(1), false))
12897       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
12898   }
12899 
12900   return SDValue();
12901 }
12902 
12903 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
12904   SDValue N0 = N->getOperand(0);
12905   EVT VT = N->getValueType(0);
12906 
12907   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
12908   if (N0.isUndef())
12909     return DAG.getConstant(0, SDLoc(N), VT);
12910 
12911   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12912     return Res;
12913 
12914   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12915     return SDValue(N, 0);
12916 
12917   return SDValue();
12918 }
12919 
12920 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12921   SDValue N0 = N->getOperand(0);
12922   EVT VT = N->getValueType(0);
12923   EVT SrcVT = N0.getValueType();
12924   bool isLE = DAG.getDataLayout().isLittleEndian();
12925 
12926   // noop truncate
12927   if (SrcVT == VT)
12928     return N0;
12929 
12930   // fold (truncate (truncate x)) -> (truncate x)
12931   if (N0.getOpcode() == ISD::TRUNCATE)
12932     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12933 
12934   // fold (truncate c1) -> c1
12935   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12936     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12937     if (C.getNode() != N)
12938       return C;
12939   }
12940 
12941   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12942   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12943       N0.getOpcode() == ISD::SIGN_EXTEND ||
12944       N0.getOpcode() == ISD::ANY_EXTEND) {
12945     // if the source is smaller than the dest, we still need an extend.
12946     if (N0.getOperand(0).getValueType().bitsLT(VT))
12947       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12948     // if the source is larger than the dest, than we just need the truncate.
12949     if (N0.getOperand(0).getValueType().bitsGT(VT))
12950       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12951     // if the source and dest are the same type, we can drop both the extend
12952     // and the truncate.
12953     return N0.getOperand(0);
12954   }
12955 
12956   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12957   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12958     return SDValue();
12959 
12960   // Fold extract-and-trunc into a narrow extract. For example:
12961   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12962   //   i32 y = TRUNCATE(i64 x)
12963   //        -- becomes --
12964   //   v16i8 b = BITCAST (v2i64 val)
12965   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12966   //
12967   // Note: We only run this optimization after type legalization (which often
12968   // creates this pattern) and before operation legalization after which
12969   // we need to be more careful about the vector instructions that we generate.
12970   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12971       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12972     EVT VecTy = N0.getOperand(0).getValueType();
12973     EVT ExTy = N0.getValueType();
12974     EVT TrTy = N->getValueType(0);
12975 
12976     auto EltCnt = VecTy.getVectorElementCount();
12977     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12978     auto NewEltCnt = EltCnt * SizeRatio;
12979 
12980     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12981     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12982 
12983     SDValue EltNo = N0->getOperand(1);
12984     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12985       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12986       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12987 
12988       SDLoc DL(N);
12989       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12990                          DAG.getBitcast(NVT, N0.getOperand(0)),
12991                          DAG.getVectorIdxConstant(Index, DL));
12992     }
12993   }
12994 
12995   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12996   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12997     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12998         TLI.isTruncateFree(SrcVT, VT)) {
12999       SDLoc SL(N0);
13000       SDValue Cond = N0.getOperand(0);
13001       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13002       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
13003       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
13004     }
13005   }
13006 
13007   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
13008   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13009       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
13010       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
13011     SDValue Amt = N0.getOperand(1);
13012     KnownBits Known = DAG.computeKnownBits(Amt);
13013     unsigned Size = VT.getScalarSizeInBits();
13014     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
13015       SDLoc SL(N);
13016       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
13017 
13018       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13019       if (AmtVT != Amt.getValueType()) {
13020         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
13021         AddToWorklist(Amt.getNode());
13022       }
13023       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
13024     }
13025   }
13026 
13027   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
13028     return V;
13029 
13030   // Attempt to pre-truncate BUILD_VECTOR sources.
13031   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
13032       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
13033       // Avoid creating illegal types if running after type legalizer.
13034       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
13035     SDLoc DL(N);
13036     EVT SVT = VT.getScalarType();
13037     SmallVector<SDValue, 8> TruncOps;
13038     for (const SDValue &Op : N0->op_values()) {
13039       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
13040       TruncOps.push_back(TruncOp);
13041     }
13042     return DAG.getBuildVector(VT, DL, TruncOps);
13043   }
13044 
13045   // Fold a series of buildvector, bitcast, and truncate if possible.
13046   // For example fold
13047   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
13048   //   (2xi32 (buildvector x, y)).
13049   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
13050       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13051       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
13052       N0.getOperand(0).hasOneUse()) {
13053     SDValue BuildVect = N0.getOperand(0);
13054     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
13055     EVT TruncVecEltTy = VT.getVectorElementType();
13056 
13057     // Check that the element types match.
13058     if (BuildVectEltTy == TruncVecEltTy) {
13059       // Now we only need to compute the offset of the truncated elements.
13060       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
13061       unsigned TruncVecNumElts = VT.getVectorNumElements();
13062       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
13063 
13064       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
13065              "Invalid number of elements");
13066 
13067       SmallVector<SDValue, 8> Opnds;
13068       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
13069         Opnds.push_back(BuildVect.getOperand(i));
13070 
13071       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13072     }
13073   }
13074 
13075   // See if we can simplify the input to this truncate through knowledge that
13076   // only the low bits are being used.
13077   // For example "trunc (or (shl x, 8), y)" // -> trunc y
13078   // Currently we only perform this optimization on scalars because vectors
13079   // may have different active low bits.
13080   if (!VT.isVector()) {
13081     APInt Mask =
13082         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
13083     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
13084       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
13085   }
13086 
13087   // fold (truncate (load x)) -> (smaller load x)
13088   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
13089   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
13090     if (SDValue Reduced = reduceLoadWidth(N))
13091       return Reduced;
13092 
13093     // Handle the case where the load remains an extending load even
13094     // after truncation.
13095     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
13096       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13097       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
13098         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
13099                                          VT, LN0->getChain(), LN0->getBasePtr(),
13100                                          LN0->getMemoryVT(),
13101                                          LN0->getMemOperand());
13102         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
13103         return NewLoad;
13104       }
13105     }
13106   }
13107 
13108   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
13109   // where ... are all 'undef'.
13110   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
13111     SmallVector<EVT, 8> VTs;
13112     SDValue V;
13113     unsigned Idx = 0;
13114     unsigned NumDefs = 0;
13115 
13116     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
13117       SDValue X = N0.getOperand(i);
13118       if (!X.isUndef()) {
13119         V = X;
13120         Idx = i;
13121         NumDefs++;
13122       }
13123       // Stop if more than one members are non-undef.
13124       if (NumDefs > 1)
13125         break;
13126 
13127       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
13128                                      VT.getVectorElementType(),
13129                                      X.getValueType().getVectorElementCount()));
13130     }
13131 
13132     if (NumDefs == 0)
13133       return DAG.getUNDEF(VT);
13134 
13135     if (NumDefs == 1) {
13136       assert(V.getNode() && "The single defined operand is empty!");
13137       SmallVector<SDValue, 8> Opnds;
13138       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
13139         if (i != Idx) {
13140           Opnds.push_back(DAG.getUNDEF(VTs[i]));
13141           continue;
13142         }
13143         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
13144         AddToWorklist(NV.getNode());
13145         Opnds.push_back(NV);
13146       }
13147       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
13148     }
13149   }
13150 
13151   // Fold truncate of a bitcast of a vector to an extract of the low vector
13152   // element.
13153   //
13154   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
13155   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
13156     SDValue VecSrc = N0.getOperand(0);
13157     EVT VecSrcVT = VecSrc.getValueType();
13158     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
13159         (!LegalOperations ||
13160          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
13161       SDLoc SL(N);
13162 
13163       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
13164       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
13165                          DAG.getVectorIdxConstant(Idx, SL));
13166     }
13167   }
13168 
13169   // Simplify the operands using demanded-bits information.
13170   if (SimplifyDemandedBits(SDValue(N, 0)))
13171     return SDValue(N, 0);
13172 
13173   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
13174   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
13175   // When the adde's carry is not used.
13176   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
13177       N0.hasOneUse() && !N0->hasAnyUseOfValue(1) &&
13178       // We only do for addcarry before legalize operation
13179       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
13180        TLI.isOperationLegal(N0.getOpcode(), VT))) {
13181     SDLoc SL(N);
13182     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13183     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13184     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
13185     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
13186   }
13187 
13188   // fold (truncate (extract_subvector(ext x))) ->
13189   //      (extract_subvector x)
13190   // TODO: This can be generalized to cover cases where the truncate and extract
13191   // do not fully cancel each other out.
13192   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
13193     SDValue N00 = N0.getOperand(0);
13194     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
13195         N00.getOpcode() == ISD::ZERO_EXTEND ||
13196         N00.getOpcode() == ISD::ANY_EXTEND) {
13197       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
13198           VT.getVectorElementType())
13199         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
13200                            N00.getOperand(0), N0.getOperand(1));
13201     }
13202   }
13203 
13204   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13205     return NewVSel;
13206 
13207   // Narrow a suitable binary operation with a non-opaque constant operand by
13208   // moving it ahead of the truncate. This is limited to pre-legalization
13209   // because targets may prefer a wider type during later combines and invert
13210   // this transform.
13211   switch (N0.getOpcode()) {
13212   case ISD::ADD:
13213   case ISD::SUB:
13214   case ISD::MUL:
13215   case ISD::AND:
13216   case ISD::OR:
13217   case ISD::XOR:
13218     if (!LegalOperations && N0.hasOneUse() &&
13219         (isConstantOrConstantVector(N0.getOperand(0), true) ||
13220          isConstantOrConstantVector(N0.getOperand(1), true))) {
13221       // TODO: We already restricted this to pre-legalization, but for vectors
13222       // we are extra cautious to not create an unsupported operation.
13223       // Target-specific changes are likely needed to avoid regressions here.
13224       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
13225         SDLoc DL(N);
13226         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13227         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13228         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
13229       }
13230     }
13231     break;
13232   case ISD::USUBSAT:
13233     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
13234     // enough to know that the upper bits are zero we must ensure that we don't
13235     // introduce an extra truncate.
13236     if (!LegalOperations && N0.hasOneUse() &&
13237         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13238         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
13239             VT.getScalarSizeInBits() &&
13240         hasOperation(N0.getOpcode(), VT)) {
13241       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
13242                                  DAG, SDLoc(N));
13243     }
13244     break;
13245   }
13246 
13247   return SDValue();
13248 }
13249 
13250 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13251   SDValue Elt = N->getOperand(i);
13252   if (Elt.getOpcode() != ISD::MERGE_VALUES)
13253     return Elt.getNode();
13254   return Elt.getOperand(Elt.getResNo()).getNode();
13255 }
13256 
13257 /// build_pair (load, load) -> load
13258 /// if load locations are consecutive.
13259 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13260   assert(N->getOpcode() == ISD::BUILD_PAIR);
13261 
13262   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13263   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13264 
13265   // A BUILD_PAIR is always having the least significant part in elt 0 and the
13266   // most significant part in elt 1. So when combining into one large load, we
13267   // need to consider the endianness.
13268   if (DAG.getDataLayout().isBigEndian())
13269     std::swap(LD1, LD2);
13270 
13271   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13272       !LD1->hasOneUse() || !LD2->hasOneUse() ||
13273       LD1->getAddressSpace() != LD2->getAddressSpace())
13274     return SDValue();
13275 
13276   bool LD1Fast = false;
13277   EVT LD1VT = LD1->getValueType(0);
13278   unsigned LD1Bytes = LD1VT.getStoreSize();
13279   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13280       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13281       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13282                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13283     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13284                        LD1->getPointerInfo(), LD1->getAlign());
13285 
13286   return SDValue();
13287 }
13288 
13289 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13290   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13291   // and Lo parts; on big-endian machines it doesn't.
13292   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13293 }
13294 
13295 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
13296                                     const TargetLowering &TLI) {
13297   // If this is not a bitcast to an FP type or if the target doesn't have
13298   // IEEE754-compliant FP logic, we're done.
13299   EVT VT = N->getValueType(0);
13300   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13301     return SDValue();
13302 
13303   // TODO: Handle cases where the integer constant is a different scalar
13304   // bitwidth to the FP.
13305   SDValue N0 = N->getOperand(0);
13306   EVT SourceVT = N0.getValueType();
13307   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13308     return SDValue();
13309 
13310   unsigned FPOpcode;
13311   APInt SignMask;
13312   switch (N0.getOpcode()) {
13313   case ISD::AND:
13314     FPOpcode = ISD::FABS;
13315     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13316     break;
13317   case ISD::XOR:
13318     FPOpcode = ISD::FNEG;
13319     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13320     break;
13321   case ISD::OR:
13322     FPOpcode = ISD::FABS;
13323     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13324     break;
13325   default:
13326     return SDValue();
13327   }
13328 
13329   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13330   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13331   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13332   //   fneg (fabs X)
13333   SDValue LogicOp0 = N0.getOperand(0);
13334   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13335   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13336       LogicOp0.getOpcode() == ISD::BITCAST &&
13337       LogicOp0.getOperand(0).getValueType() == VT) {
13338     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13339     NumFPLogicOpsConv++;
13340     if (N0.getOpcode() == ISD::OR)
13341       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13342     return FPOp;
13343   }
13344 
13345   return SDValue();
13346 }
13347 
13348 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13349   SDValue N0 = N->getOperand(0);
13350   EVT VT = N->getValueType(0);
13351 
13352   if (N0.isUndef())
13353     return DAG.getUNDEF(VT);
13354 
13355   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13356   // Only do this before legalize types, unless both types are integer and the
13357   // scalar type is legal. Only do this before legalize ops, since the target
13358   // maybe depending on the bitcast.
13359   // First check to see if this is all constant.
13360   // TODO: Support FP bitcasts after legalize types.
13361   if (VT.isVector() &&
13362       (!LegalTypes ||
13363        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13364         TLI.isTypeLegal(VT.getVectorElementType()))) &&
13365       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
13366       cast<BuildVectorSDNode>(N0)->isConstant())
13367     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13368                                              VT.getVectorElementType());
13369 
13370   // If the input is a constant, let getNode fold it.
13371   if (isIntOrFPConstant(N0)) {
13372     // If we can't allow illegal operations, we need to check that this is just
13373     // a fp -> int or int -> conversion and that the resulting operation will
13374     // be legal.
13375     if (!LegalOperations ||
13376         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13377          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13378         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13379          TLI.isOperationLegal(ISD::Constant, VT))) {
13380       SDValue C = DAG.getBitcast(VT, N0);
13381       if (C.getNode() != N)
13382         return C;
13383     }
13384   }
13385 
13386   // (conv (conv x, t1), t2) -> (conv x, t2)
13387   if (N0.getOpcode() == ISD::BITCAST)
13388     return DAG.getBitcast(VT, N0.getOperand(0));
13389 
13390   // fold (conv (load x)) -> (load (conv*)x)
13391   // If the resultant load doesn't need a higher alignment than the original!
13392   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13393       // Do not remove the cast if the types differ in endian layout.
13394       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13395           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13396       // If the load is volatile, we only want to change the load type if the
13397       // resulting load is legal. Otherwise we might increase the number of
13398       // memory accesses. We don't care if the original type was legal or not
13399       // as we assume software couldn't rely on the number of accesses of an
13400       // illegal type.
13401       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13402        TLI.isOperationLegal(ISD::LOAD, VT))) {
13403     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13404 
13405     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13406                                     *LN0->getMemOperand())) {
13407       SDValue Load =
13408           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13409                       LN0->getPointerInfo(), LN0->getAlign(),
13410                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13411       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13412       return Load;
13413     }
13414   }
13415 
13416   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13417     return V;
13418 
13419   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13420   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13421   //
13422   // For ppc_fp128:
13423   // fold (bitcast (fneg x)) ->
13424   //     flipbit = signbit
13425   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13426   //
13427   // fold (bitcast (fabs x)) ->
13428   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
13429   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13430   // This often reduces constant pool loads.
13431   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13432        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13433       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
13434       !N0.getValueType().isVector()) {
13435     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13436     AddToWorklist(NewConv.getNode());
13437 
13438     SDLoc DL(N);
13439     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13440       assert(VT.getSizeInBits() == 128);
13441       SDValue SignBit = DAG.getConstant(
13442           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13443       SDValue FlipBit;
13444       if (N0.getOpcode() == ISD::FNEG) {
13445         FlipBit = SignBit;
13446         AddToWorklist(FlipBit.getNode());
13447       } else {
13448         assert(N0.getOpcode() == ISD::FABS);
13449         SDValue Hi =
13450             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13451                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13452                                               SDLoc(NewConv)));
13453         AddToWorklist(Hi.getNode());
13454         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13455         AddToWorklist(FlipBit.getNode());
13456       }
13457       SDValue FlipBits =
13458           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13459       AddToWorklist(FlipBits.getNode());
13460       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13461     }
13462     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13463     if (N0.getOpcode() == ISD::FNEG)
13464       return DAG.getNode(ISD::XOR, DL, VT,
13465                          NewConv, DAG.getConstant(SignBit, DL, VT));
13466     assert(N0.getOpcode() == ISD::FABS);
13467     return DAG.getNode(ISD::AND, DL, VT,
13468                        NewConv, DAG.getConstant(~SignBit, DL, VT));
13469   }
13470 
13471   // fold (bitconvert (fcopysign cst, x)) ->
13472   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
13473   // Note that we don't handle (copysign x, cst) because this can always be
13474   // folded to an fneg or fabs.
13475   //
13476   // For ppc_fp128:
13477   // fold (bitcast (fcopysign cst, x)) ->
13478   //     flipbit = (and (extract_element
13479   //                     (xor (bitcast cst), (bitcast x)), 0),
13480   //                    signbit)
13481   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
13482   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
13483       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
13484       !VT.isVector()) {
13485     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13486     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13487     if (isTypeLegal(IntXVT)) {
13488       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13489       AddToWorklist(X.getNode());
13490 
13491       // If X has a different width than the result/lhs, sext it or truncate it.
13492       unsigned VTWidth = VT.getSizeInBits();
13493       if (OrigXWidth < VTWidth) {
13494         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13495         AddToWorklist(X.getNode());
13496       } else if (OrigXWidth > VTWidth) {
13497         // To get the sign bit in the right place, we have to shift it right
13498         // before truncating.
13499         SDLoc DL(X);
13500         X = DAG.getNode(ISD::SRL, DL,
13501                         X.getValueType(), X,
13502                         DAG.getConstant(OrigXWidth-VTWidth, DL,
13503                                         X.getValueType()));
13504         AddToWorklist(X.getNode());
13505         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13506         AddToWorklist(X.getNode());
13507       }
13508 
13509       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13510         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13511         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13512         AddToWorklist(Cst.getNode());
13513         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13514         AddToWorklist(X.getNode());
13515         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13516         AddToWorklist(XorResult.getNode());
13517         SDValue XorResult64 = DAG.getNode(
13518             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13519             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13520                                   SDLoc(XorResult)));
13521         AddToWorklist(XorResult64.getNode());
13522         SDValue FlipBit =
13523             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13524                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13525         AddToWorklist(FlipBit.getNode());
13526         SDValue FlipBits =
13527             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13528         AddToWorklist(FlipBits.getNode());
13529         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13530       }
13531       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13532       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13533                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
13534       AddToWorklist(X.getNode());
13535 
13536       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13537       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13538                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13539       AddToWorklist(Cst.getNode());
13540 
13541       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13542     }
13543   }
13544 
13545   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13546   if (N0.getOpcode() == ISD::BUILD_PAIR)
13547     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13548       return CombineLD;
13549 
13550   // Remove double bitcasts from shuffles - this is often a legacy of
13551   // XformToShuffleWithZero being used to combine bitmaskings (of
13552   // float vectors bitcast to integer vectors) into shuffles.
13553   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13554   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13555       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13556       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13557       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13558     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13559 
13560     // If operands are a bitcast, peek through if it casts the original VT.
13561     // If operands are a constant, just bitcast back to original VT.
13562     auto PeekThroughBitcast = [&](SDValue Op) {
13563       if (Op.getOpcode() == ISD::BITCAST &&
13564           Op.getOperand(0).getValueType() == VT)
13565         return SDValue(Op.getOperand(0));
13566       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
13567           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
13568         return DAG.getBitcast(VT, Op);
13569       return SDValue();
13570     };
13571 
13572     // FIXME: If either input vector is bitcast, try to convert the shuffle to
13573     // the result type of this bitcast. This would eliminate at least one
13574     // bitcast. See the transform in InstCombine.
13575     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13576     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13577     if (!(SV0 && SV1))
13578       return SDValue();
13579 
13580     int MaskScale =
13581         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13582     SmallVector<int, 8> NewMask;
13583     for (int M : SVN->getMask())
13584       for (int i = 0; i != MaskScale; ++i)
13585         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13586 
13587     SDValue LegalShuffle =
13588         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13589     if (LegalShuffle)
13590       return LegalShuffle;
13591   }
13592 
13593   return SDValue();
13594 }
13595 
13596 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13597   EVT VT = N->getValueType(0);
13598   return CombineConsecutiveLoads(N, VT);
13599 }
13600 
13601 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13602   SDValue N0 = N->getOperand(0);
13603 
13604   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13605     return N0;
13606 
13607   return SDValue();
13608 }
13609 
13610 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13611 /// operands. DstEltVT indicates the destination element value type.
13612 SDValue DAGCombiner::
13613 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13614   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13615 
13616   // If this is already the right type, we're done.
13617   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13618 
13619   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13620   unsigned DstBitSize = DstEltVT.getSizeInBits();
13621 
13622   // If this is a conversion of N elements of one type to N elements of another
13623   // type, convert each element.  This handles FP<->INT cases.
13624   if (SrcBitSize == DstBitSize) {
13625     SmallVector<SDValue, 8> Ops;
13626     for (SDValue Op : BV->op_values()) {
13627       // If the vector element type is not legal, the BUILD_VECTOR operands
13628       // are promoted and implicitly truncated.  Make that explicit here.
13629       if (Op.getValueType() != SrcEltVT)
13630         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13631       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13632       AddToWorklist(Ops.back().getNode());
13633     }
13634     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13635                               BV->getValueType(0).getVectorNumElements());
13636     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13637   }
13638 
13639   // Otherwise, we're growing or shrinking the elements.  To avoid having to
13640   // handle annoying details of growing/shrinking FP values, we convert them to
13641   // int first.
13642   if (SrcEltVT.isFloatingPoint()) {
13643     // Convert the input float vector to a int vector where the elements are the
13644     // same sizes.
13645     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13646     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13647     SrcEltVT = IntVT;
13648   }
13649 
13650   // Now we know the input is an integer vector.  If the output is a FP type,
13651   // convert to integer first, then to FP of the right size.
13652   if (DstEltVT.isFloatingPoint()) {
13653     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13654     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13655 
13656     // Next, convert to FP elements of the same size.
13657     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13658   }
13659 
13660   // Okay, we know the src/dst types are both integers of differing types.
13661   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13662 
13663   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13664   // BuildVectorSDNode?
13665   auto *BVN = cast<BuildVectorSDNode>(BV);
13666 
13667   // Extract the constant raw bit data.
13668   BitVector UndefElements;
13669   SmallVector<APInt> RawBits;
13670   bool IsLE = DAG.getDataLayout().isLittleEndian();
13671   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13672     return SDValue();
13673 
13674   SDLoc DL(BV);
13675   SmallVector<SDValue, 8> Ops;
13676   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13677     if (UndefElements[I])
13678       Ops.push_back(DAG.getUNDEF(DstEltVT));
13679     else
13680       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13681   }
13682 
13683   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13684   return DAG.getBuildVector(VT, DL, Ops);
13685 }
13686 
13687 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13688 // `N`
13689 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13690   assert(N.getOpcode() == ISD::FMUL);
13691 
13692   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13693          N->getFlags().hasAllowContract();
13694 }
13695 
13696 // Returns true if `N` can assume no infinities involved in its computation.
13697 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13698   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
13699 }
13700 
13701 /// Try to perform FMA combining on a given FADD node.
13702 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13703   SDValue N0 = N->getOperand(0);
13704   SDValue N1 = N->getOperand(1);
13705   EVT VT = N->getValueType(0);
13706   SDLoc SL(N);
13707 
13708   const TargetOptions &Options = DAG.getTarget().Options;
13709 
13710   // Floating-point multiply-add with intermediate rounding.
13711   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13712 
13713   // Floating-point multiply-add without intermediate rounding.
13714   bool HasFMA =
13715       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13716       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13717 
13718   // No valid opcode, do not combine.
13719   if (!HasFMAD && !HasFMA)
13720     return SDValue();
13721 
13722   bool CanReassociate =
13723       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13724   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13725                               Options.UnsafeFPMath || HasFMAD);
13726   // If the addition is not contractable, do not combine.
13727   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13728     return SDValue();
13729 
13730   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13731     return SDValue();
13732 
13733   // Always prefer FMAD to FMA for precision.
13734   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13735   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13736 
13737   auto isFusedOp = [&](SDValue N) {
13738     unsigned Opcode = N.getOpcode();
13739     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13740   };
13741 
13742   // Is the node an FMUL and contractable either due to global flags or
13743   // SDNodeFlags.
13744   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13745     if (N.getOpcode() != ISD::FMUL)
13746       return false;
13747     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13748   };
13749   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13750   // prefer to fold the multiply with fewer uses.
13751   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13752     if (N0->use_size() > N1->use_size())
13753       std::swap(N0, N1);
13754   }
13755 
13756   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13757   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13758     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13759                        N0.getOperand(1), N1);
13760   }
13761 
13762   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13763   // Note: Commutes FADD operands.
13764   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13765     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13766                        N1.getOperand(1), N0);
13767   }
13768 
13769   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13770   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13771   // This requires reassociation because it changes the order of operations.
13772   SDValue FMA, E;
13773   if (CanReassociate && isFusedOp(N0) &&
13774       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13775       N0.getOperand(2).hasOneUse()) {
13776     FMA = N0;
13777     E = N1;
13778   } else if (CanReassociate && isFusedOp(N1) &&
13779              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13780              N1.getOperand(2).hasOneUse()) {
13781     FMA = N1;
13782     E = N0;
13783   }
13784   if (FMA && E) {
13785     SDValue A = FMA.getOperand(0);
13786     SDValue B = FMA.getOperand(1);
13787     SDValue C = FMA.getOperand(2).getOperand(0);
13788     SDValue D = FMA.getOperand(2).getOperand(1);
13789     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13790     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13791   }
13792 
13793   // Look through FP_EXTEND nodes to do more combining.
13794 
13795   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13796   if (N0.getOpcode() == ISD::FP_EXTEND) {
13797     SDValue N00 = N0.getOperand(0);
13798     if (isContractableFMUL(N00) &&
13799         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13800                             N00.getValueType())) {
13801       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13802                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13803                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13804                          N1);
13805     }
13806   }
13807 
13808   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13809   // Note: Commutes FADD operands.
13810   if (N1.getOpcode() == ISD::FP_EXTEND) {
13811     SDValue N10 = N1.getOperand(0);
13812     if (isContractableFMUL(N10) &&
13813         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13814                             N10.getValueType())) {
13815       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13816                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13817                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13818                          N0);
13819     }
13820   }
13821 
13822   // More folding opportunities when target permits.
13823   if (Aggressive) {
13824     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
13825     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
13826     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13827                                     SDValue Z) {
13828       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
13829                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13830                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13831                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
13832                                      Z));
13833     };
13834     if (isFusedOp(N0)) {
13835       SDValue N02 = N0.getOperand(2);
13836       if (N02.getOpcode() == ISD::FP_EXTEND) {
13837         SDValue N020 = N02.getOperand(0);
13838         if (isContractableFMUL(N020) &&
13839             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13840                                 N020.getValueType())) {
13841           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
13842                                       N020.getOperand(0), N020.getOperand(1),
13843                                       N1);
13844         }
13845       }
13846     }
13847 
13848     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
13849     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
13850     // FIXME: This turns two single-precision and one double-precision
13851     // operation into two double-precision operations, which might not be
13852     // interesting for all targets, especially GPUs.
13853     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
13854                                     SDValue Z) {
13855       return DAG.getNode(
13856           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
13857           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
13858           DAG.getNode(PreferredFusedOpcode, SL, VT,
13859                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
13860                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
13861     };
13862     if (N0.getOpcode() == ISD::FP_EXTEND) {
13863       SDValue N00 = N0.getOperand(0);
13864       if (isFusedOp(N00)) {
13865         SDValue N002 = N00.getOperand(2);
13866         if (isContractableFMUL(N002) &&
13867             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13868                                 N00.getValueType())) {
13869           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
13870                                       N002.getOperand(0), N002.getOperand(1),
13871                                       N1);
13872         }
13873       }
13874     }
13875 
13876     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
13877     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13878     if (isFusedOp(N1)) {
13879       SDValue N12 = N1.getOperand(2);
13880       if (N12.getOpcode() == ISD::FP_EXTEND) {
13881         SDValue N120 = N12.getOperand(0);
13882         if (isContractableFMUL(N120) &&
13883             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13884                                 N120.getValueType())) {
13885           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13886                                       N120.getOperand(0), N120.getOperand(1),
13887                                       N0);
13888         }
13889       }
13890     }
13891 
13892     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13893     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13894     // FIXME: This turns two single-precision and one double-precision
13895     // operation into two double-precision operations, which might not be
13896     // interesting for all targets, especially GPUs.
13897     if (N1.getOpcode() == ISD::FP_EXTEND) {
13898       SDValue N10 = N1.getOperand(0);
13899       if (isFusedOp(N10)) {
13900         SDValue N102 = N10.getOperand(2);
13901         if (isContractableFMUL(N102) &&
13902             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13903                                 N10.getValueType())) {
13904           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13905                                       N102.getOperand(0), N102.getOperand(1),
13906                                       N0);
13907         }
13908       }
13909     }
13910   }
13911 
13912   return SDValue();
13913 }
13914 
13915 /// Try to perform FMA combining on a given FSUB node.
13916 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13917   SDValue N0 = N->getOperand(0);
13918   SDValue N1 = N->getOperand(1);
13919   EVT VT = N->getValueType(0);
13920   SDLoc SL(N);
13921 
13922   const TargetOptions &Options = DAG.getTarget().Options;
13923   // Floating-point multiply-add with intermediate rounding.
13924   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13925 
13926   // Floating-point multiply-add without intermediate rounding.
13927   bool HasFMA =
13928       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13929       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13930 
13931   // No valid opcode, do not combine.
13932   if (!HasFMAD && !HasFMA)
13933     return SDValue();
13934 
13935   const SDNodeFlags Flags = N->getFlags();
13936   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13937                               Options.UnsafeFPMath || HasFMAD);
13938 
13939   // If the subtraction is not contractable, do not combine.
13940   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13941     return SDValue();
13942 
13943   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13944     return SDValue();
13945 
13946   // Always prefer FMAD to FMA for precision.
13947   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13948   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13949   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13950 
13951   // Is the node an FMUL and contractable either due to global flags or
13952   // SDNodeFlags.
13953   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13954     if (N.getOpcode() != ISD::FMUL)
13955       return false;
13956     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13957   };
13958 
13959   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13960   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13961     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13962       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13963                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13964     }
13965     return SDValue();
13966   };
13967 
13968   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13969   // Note: Commutes FSUB operands.
13970   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13971     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13972       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13973                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13974                          YZ.getOperand(1), X);
13975     }
13976     return SDValue();
13977   };
13978 
13979   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13980   // prefer to fold the multiply with fewer uses.
13981   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13982       (N0->use_size() > N1->use_size())) {
13983     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13984     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13985       return V;
13986     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13987     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13988       return V;
13989   } else {
13990     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13991     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13992       return V;
13993     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13994     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13995       return V;
13996   }
13997 
13998   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13999   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
14000       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
14001     SDValue N00 = N0.getOperand(0).getOperand(0);
14002     SDValue N01 = N0.getOperand(0).getOperand(1);
14003     return DAG.getNode(PreferredFusedOpcode, SL, VT,
14004                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
14005                        DAG.getNode(ISD::FNEG, SL, VT, N1));
14006   }
14007 
14008   // Look through FP_EXTEND nodes to do more combining.
14009 
14010   // fold (fsub (fpext (fmul x, y)), z)
14011   //   -> (fma (fpext x), (fpext y), (fneg z))
14012   if (N0.getOpcode() == ISD::FP_EXTEND) {
14013     SDValue N00 = N0.getOperand(0);
14014     if (isContractableFMUL(N00) &&
14015         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14016                             N00.getValueType())) {
14017       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14018                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14019                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14020                          DAG.getNode(ISD::FNEG, SL, VT, N1));
14021     }
14022   }
14023 
14024   // fold (fsub x, (fpext (fmul y, z)))
14025   //   -> (fma (fneg (fpext y)), (fpext z), x)
14026   // Note: Commutes FSUB operands.
14027   if (N1.getOpcode() == ISD::FP_EXTEND) {
14028     SDValue N10 = N1.getOperand(0);
14029     if (isContractableFMUL(N10) &&
14030         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14031                             N10.getValueType())) {
14032       return DAG.getNode(
14033           PreferredFusedOpcode, SL, VT,
14034           DAG.getNode(ISD::FNEG, SL, VT,
14035                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
14036           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
14037     }
14038   }
14039 
14040   // fold (fsub (fpext (fneg (fmul, x, y))), z)
14041   //   -> (fneg (fma (fpext x), (fpext y), z))
14042   // Note: This could be removed with appropriate canonicalization of the
14043   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14044   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14045   // from implementing the canonicalization in visitFSUB.
14046   if (N0.getOpcode() == ISD::FP_EXTEND) {
14047     SDValue N00 = N0.getOperand(0);
14048     if (N00.getOpcode() == ISD::FNEG) {
14049       SDValue N000 = N00.getOperand(0);
14050       if (isContractableFMUL(N000) &&
14051           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14052                               N00.getValueType())) {
14053         return DAG.getNode(
14054             ISD::FNEG, SL, VT,
14055             DAG.getNode(PreferredFusedOpcode, SL, VT,
14056                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14057                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14058                         N1));
14059       }
14060     }
14061   }
14062 
14063   // fold (fsub (fneg (fpext (fmul, x, y))), z)
14064   //   -> (fneg (fma (fpext x)), (fpext y), z)
14065   // Note: This could be removed with appropriate canonicalization of the
14066   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14067   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14068   // from implementing the canonicalization in visitFSUB.
14069   if (N0.getOpcode() == ISD::FNEG) {
14070     SDValue N00 = N0.getOperand(0);
14071     if (N00.getOpcode() == ISD::FP_EXTEND) {
14072       SDValue N000 = N00.getOperand(0);
14073       if (isContractableFMUL(N000) &&
14074           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14075                               N000.getValueType())) {
14076         return DAG.getNode(
14077             ISD::FNEG, SL, VT,
14078             DAG.getNode(PreferredFusedOpcode, SL, VT,
14079                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14080                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14081                         N1));
14082       }
14083     }
14084   }
14085 
14086   auto isReassociable = [Options](SDNode *N) {
14087     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14088   };
14089 
14090   auto isContractableAndReassociableFMUL = [isContractableFMUL,
14091                                             isReassociable](SDValue N) {
14092     return isContractableFMUL(N) && isReassociable(N.getNode());
14093   };
14094 
14095   auto isFusedOp = [&](SDValue N) {
14096     unsigned Opcode = N.getOpcode();
14097     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
14098   };
14099 
14100   // More folding opportunities when target permits.
14101   if (Aggressive && isReassociable(N)) {
14102     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
14103     // fold (fsub (fma x, y, (fmul u, v)), z)
14104     //   -> (fma x, y (fma u, v, (fneg z)))
14105     if (CanFuse && isFusedOp(N0) &&
14106         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
14107         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
14108       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14109                          N0.getOperand(1),
14110                          DAG.getNode(PreferredFusedOpcode, SL, VT,
14111                                      N0.getOperand(2).getOperand(0),
14112                                      N0.getOperand(2).getOperand(1),
14113                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
14114     }
14115 
14116     // fold (fsub x, (fma y, z, (fmul u, v)))
14117     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
14118     if (CanFuse && isFusedOp(N1) &&
14119         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
14120         N1->hasOneUse() && NoSignedZero) {
14121       SDValue N20 = N1.getOperand(2).getOperand(0);
14122       SDValue N21 = N1.getOperand(2).getOperand(1);
14123       return DAG.getNode(
14124           PreferredFusedOpcode, SL, VT,
14125           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14126           DAG.getNode(PreferredFusedOpcode, SL, VT,
14127                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
14128     }
14129 
14130     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
14131     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
14132     if (isFusedOp(N0) && N0->hasOneUse()) {
14133       SDValue N02 = N0.getOperand(2);
14134       if (N02.getOpcode() == ISD::FP_EXTEND) {
14135         SDValue N020 = N02.getOperand(0);
14136         if (isContractableAndReassociableFMUL(N020) &&
14137             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14138                                 N020.getValueType())) {
14139           return DAG.getNode(
14140               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
14141               DAG.getNode(
14142                   PreferredFusedOpcode, SL, VT,
14143                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
14144                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
14145                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14146         }
14147       }
14148     }
14149 
14150     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
14151     //   -> (fma (fpext x), (fpext y),
14152     //           (fma (fpext u), (fpext v), (fneg z)))
14153     // FIXME: This turns two single-precision and one double-precision
14154     // operation into two double-precision operations, which might not be
14155     // interesting for all targets, especially GPUs.
14156     if (N0.getOpcode() == ISD::FP_EXTEND) {
14157       SDValue N00 = N0.getOperand(0);
14158       if (isFusedOp(N00)) {
14159         SDValue N002 = N00.getOperand(2);
14160         if (isContractableAndReassociableFMUL(N002) &&
14161             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14162                                 N00.getValueType())) {
14163           return DAG.getNode(
14164               PreferredFusedOpcode, SL, VT,
14165               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14166               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14167               DAG.getNode(
14168                   PreferredFusedOpcode, SL, VT,
14169                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
14170                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
14171                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14172         }
14173       }
14174     }
14175 
14176     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
14177     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
14178     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
14179         N1->hasOneUse()) {
14180       SDValue N120 = N1.getOperand(2).getOperand(0);
14181       if (isContractableAndReassociableFMUL(N120) &&
14182           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14183                               N120.getValueType())) {
14184         SDValue N1200 = N120.getOperand(0);
14185         SDValue N1201 = N120.getOperand(1);
14186         return DAG.getNode(
14187             PreferredFusedOpcode, SL, VT,
14188             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14189             DAG.getNode(PreferredFusedOpcode, SL, VT,
14190                         DAG.getNode(ISD::FNEG, SL, VT,
14191                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
14192                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
14193       }
14194     }
14195 
14196     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
14197     //   -> (fma (fneg (fpext y)), (fpext z),
14198     //           (fma (fneg (fpext u)), (fpext v), x))
14199     // FIXME: This turns two single-precision and one double-precision
14200     // operation into two double-precision operations, which might not be
14201     // interesting for all targets, especially GPUs.
14202     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
14203       SDValue CvtSrc = N1.getOperand(0);
14204       SDValue N100 = CvtSrc.getOperand(0);
14205       SDValue N101 = CvtSrc.getOperand(1);
14206       SDValue N102 = CvtSrc.getOperand(2);
14207       if (isContractableAndReassociableFMUL(N102) &&
14208           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14209                               CvtSrc.getValueType())) {
14210         SDValue N1020 = N102.getOperand(0);
14211         SDValue N1021 = N102.getOperand(1);
14212         return DAG.getNode(
14213             PreferredFusedOpcode, SL, VT,
14214             DAG.getNode(ISD::FNEG, SL, VT,
14215                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
14216             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
14217             DAG.getNode(PreferredFusedOpcode, SL, VT,
14218                         DAG.getNode(ISD::FNEG, SL, VT,
14219                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
14220                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
14221       }
14222     }
14223   }
14224 
14225   return SDValue();
14226 }
14227 
14228 /// Try to perform FMA combining on a given FMUL node based on the distributive
14229 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
14230 /// subtraction instead of addition).
14231 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
14232   SDValue N0 = N->getOperand(0);
14233   SDValue N1 = N->getOperand(1);
14234   EVT VT = N->getValueType(0);
14235   SDLoc SL(N);
14236 
14237   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
14238 
14239   const TargetOptions &Options = DAG.getTarget().Options;
14240 
14241   // The transforms below are incorrect when x == 0 and y == inf, because the
14242   // intermediate multiplication produces a nan.
14243   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
14244   if (!hasNoInfs(Options, FAdd))
14245     return SDValue();
14246 
14247   // Floating-point multiply-add without intermediate rounding.
14248   bool HasFMA =
14249       isContractableFMUL(Options, SDValue(N, 0)) &&
14250       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14251       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14252 
14253   // Floating-point multiply-add with intermediate rounding. This can result
14254   // in a less precise result due to the changed rounding order.
14255   bool HasFMAD = Options.UnsafeFPMath &&
14256                  (LegalOperations && TLI.isFMADLegal(DAG, N));
14257 
14258   // No valid opcode, do not combine.
14259   if (!HasFMAD && !HasFMA)
14260     return SDValue();
14261 
14262   // Always prefer FMAD to FMA for precision.
14263   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14264   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14265 
14266   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14267   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14268   auto FuseFADD = [&](SDValue X, SDValue Y) {
14269     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14270       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14271         if (C->isExactlyValue(+1.0))
14272           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14273                              Y);
14274         if (C->isExactlyValue(-1.0))
14275           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14276                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14277       }
14278     }
14279     return SDValue();
14280   };
14281 
14282   if (SDValue FMA = FuseFADD(N0, N1))
14283     return FMA;
14284   if (SDValue FMA = FuseFADD(N1, N0))
14285     return FMA;
14286 
14287   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14288   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14289   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14290   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14291   auto FuseFSUB = [&](SDValue X, SDValue Y) {
14292     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14293       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14294         if (C0->isExactlyValue(+1.0))
14295           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14296                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14297                              Y);
14298         if (C0->isExactlyValue(-1.0))
14299           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14300                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14301                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14302       }
14303       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14304         if (C1->isExactlyValue(+1.0))
14305           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14306                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14307         if (C1->isExactlyValue(-1.0))
14308           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14309                              Y);
14310       }
14311     }
14312     return SDValue();
14313   };
14314 
14315   if (SDValue FMA = FuseFSUB(N0, N1))
14316     return FMA;
14317   if (SDValue FMA = FuseFSUB(N1, N0))
14318     return FMA;
14319 
14320   return SDValue();
14321 }
14322 
14323 SDValue DAGCombiner::visitFADD(SDNode *N) {
14324   SDValue N0 = N->getOperand(0);
14325   SDValue N1 = N->getOperand(1);
14326   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14327   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14328   EVT VT = N->getValueType(0);
14329   SDLoc DL(N);
14330   const TargetOptions &Options = DAG.getTarget().Options;
14331   SDNodeFlags Flags = N->getFlags();
14332   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14333 
14334   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14335     return R;
14336 
14337   // fold (fadd c1, c2) -> c1 + c2
14338   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14339     return C;
14340 
14341   // canonicalize constant to RHS
14342   if (N0CFP && !N1CFP)
14343     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14344 
14345   // fold vector ops
14346   if (VT.isVector())
14347     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14348       return FoldedVOp;
14349 
14350   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14351   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14352   if (N1C && N1C->isZero())
14353     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14354       return N0;
14355 
14356   if (SDValue NewSel = foldBinOpIntoSelect(N))
14357     return NewSel;
14358 
14359   // fold (fadd A, (fneg B)) -> (fsub A, B)
14360   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14361     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14362             N1, DAG, LegalOperations, ForCodeSize))
14363       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14364 
14365   // fold (fadd (fneg A), B) -> (fsub B, A)
14366   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14367     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14368             N0, DAG, LegalOperations, ForCodeSize))
14369       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14370 
14371   auto isFMulNegTwo = [](SDValue FMul) {
14372     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14373       return false;
14374     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14375     return C && C->isExactlyValue(-2.0);
14376   };
14377 
14378   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14379   if (isFMulNegTwo(N0)) {
14380     SDValue B = N0.getOperand(0);
14381     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14382     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14383   }
14384   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14385   if (isFMulNegTwo(N1)) {
14386     SDValue B = N1.getOperand(0);
14387     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14388     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14389   }
14390 
14391   // No FP constant should be created after legalization as Instruction
14392   // Selection pass has a hard time dealing with FP constants.
14393   bool AllowNewConst = (Level < AfterLegalizeDAG);
14394 
14395   // If nnan is enabled, fold lots of things.
14396   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14397     // If allowed, fold (fadd (fneg x), x) -> 0.0
14398     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14399       return DAG.getConstantFP(0.0, DL, VT);
14400 
14401     // If allowed, fold (fadd x, (fneg x)) -> 0.0
14402     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14403       return DAG.getConstantFP(0.0, DL, VT);
14404   }
14405 
14406   // If 'unsafe math' or reassoc and nsz, fold lots of things.
14407   // TODO: break out portions of the transformations below for which Unsafe is
14408   //       considered and which do not require both nsz and reassoc
14409   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14410        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14411       AllowNewConst) {
14412     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14413     if (N1CFP && N0.getOpcode() == ISD::FADD &&
14414         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14415       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14416       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14417     }
14418 
14419     // We can fold chains of FADD's of the same value into multiplications.
14420     // This transform is not safe in general because we are reducing the number
14421     // of rounding steps.
14422     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14423       if (N0.getOpcode() == ISD::FMUL) {
14424         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14425         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14426 
14427         // (fadd (fmul x, c), x) -> (fmul x, c+1)
14428         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14429           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14430                                        DAG.getConstantFP(1.0, DL, VT));
14431           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14432         }
14433 
14434         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14435         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14436             N1.getOperand(0) == N1.getOperand(1) &&
14437             N0.getOperand(0) == N1.getOperand(0)) {
14438           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14439                                        DAG.getConstantFP(2.0, DL, VT));
14440           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14441         }
14442       }
14443 
14444       if (N1.getOpcode() == ISD::FMUL) {
14445         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14446         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14447 
14448         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14449         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14450           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14451                                        DAG.getConstantFP(1.0, DL, VT));
14452           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14453         }
14454 
14455         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14456         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14457             N0.getOperand(0) == N0.getOperand(1) &&
14458             N1.getOperand(0) == N0.getOperand(0)) {
14459           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14460                                        DAG.getConstantFP(2.0, DL, VT));
14461           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14462         }
14463       }
14464 
14465       if (N0.getOpcode() == ISD::FADD) {
14466         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14467         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14468         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14469             (N0.getOperand(0) == N1)) {
14470           return DAG.getNode(ISD::FMUL, DL, VT, N1,
14471                              DAG.getConstantFP(3.0, DL, VT));
14472         }
14473       }
14474 
14475       if (N1.getOpcode() == ISD::FADD) {
14476         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14477         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14478         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14479             N1.getOperand(0) == N0) {
14480           return DAG.getNode(ISD::FMUL, DL, VT, N0,
14481                              DAG.getConstantFP(3.0, DL, VT));
14482         }
14483       }
14484 
14485       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14486       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14487           N0.getOperand(0) == N0.getOperand(1) &&
14488           N1.getOperand(0) == N1.getOperand(1) &&
14489           N0.getOperand(0) == N1.getOperand(0)) {
14490         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14491                            DAG.getConstantFP(4.0, DL, VT));
14492       }
14493     }
14494   } // enable-unsafe-fp-math
14495 
14496   // FADD -> FMA combines:
14497   if (SDValue Fused = visitFADDForFMACombine(N)) {
14498     AddToWorklist(Fused.getNode());
14499     return Fused;
14500   }
14501   return SDValue();
14502 }
14503 
14504 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14505   SDValue Chain = N->getOperand(0);
14506   SDValue N0 = N->getOperand(1);
14507   SDValue N1 = N->getOperand(2);
14508   EVT VT = N->getValueType(0);
14509   EVT ChainVT = N->getValueType(1);
14510   SDLoc DL(N);
14511   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14512 
14513   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14514   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14515     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14516             N1, DAG, LegalOperations, ForCodeSize)) {
14517       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14518                          {Chain, N0, NegN1});
14519     }
14520 
14521   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14522   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14523     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14524             N0, DAG, LegalOperations, ForCodeSize)) {
14525       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14526                          {Chain, N1, NegN0});
14527     }
14528   return SDValue();
14529 }
14530 
14531 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14532   SDValue N0 = N->getOperand(0);
14533   SDValue N1 = N->getOperand(1);
14534   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14535   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14536   EVT VT = N->getValueType(0);
14537   SDLoc DL(N);
14538   const TargetOptions &Options = DAG.getTarget().Options;
14539   const SDNodeFlags Flags = N->getFlags();
14540   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14541 
14542   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14543     return R;
14544 
14545   // fold (fsub c1, c2) -> c1-c2
14546   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14547     return C;
14548 
14549   // fold vector ops
14550   if (VT.isVector())
14551     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14552       return FoldedVOp;
14553 
14554   if (SDValue NewSel = foldBinOpIntoSelect(N))
14555     return NewSel;
14556 
14557   // (fsub A, 0) -> A
14558   if (N1CFP && N1CFP->isZero()) {
14559     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14560         Flags.hasNoSignedZeros()) {
14561       return N0;
14562     }
14563   }
14564 
14565   if (N0 == N1) {
14566     // (fsub x, x) -> 0.0
14567     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14568       return DAG.getConstantFP(0.0f, DL, VT);
14569   }
14570 
14571   // (fsub -0.0, N1) -> -N1
14572   if (N0CFP && N0CFP->isZero()) {
14573     if (N0CFP->isNegative() ||
14574         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14575       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14576       // flushed to zero, unless all users treat denorms as zero (DAZ).
14577       // FIXME: This transform will change the sign of a NaN and the behavior
14578       // of a signaling NaN. It is only valid when a NoNaN flag is present.
14579       DenormalMode DenormMode = DAG.getDenormalMode(VT);
14580       if (DenormMode == DenormalMode::getIEEE()) {
14581         if (SDValue NegN1 =
14582                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14583           return NegN1;
14584         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14585           return DAG.getNode(ISD::FNEG, DL, VT, N1);
14586       }
14587     }
14588   }
14589 
14590   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14591        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14592       N1.getOpcode() == ISD::FADD) {
14593     // X - (X + Y) -> -Y
14594     if (N0 == N1->getOperand(0))
14595       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14596     // X - (Y + X) -> -Y
14597     if (N0 == N1->getOperand(1))
14598       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14599   }
14600 
14601   // fold (fsub A, (fneg B)) -> (fadd A, B)
14602   if (SDValue NegN1 =
14603           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14604     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14605 
14606   // FSUB -> FMA combines:
14607   if (SDValue Fused = visitFSUBForFMACombine(N)) {
14608     AddToWorklist(Fused.getNode());
14609     return Fused;
14610   }
14611 
14612   return SDValue();
14613 }
14614 
14615 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14616   SDValue N0 = N->getOperand(0);
14617   SDValue N1 = N->getOperand(1);
14618   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14619   EVT VT = N->getValueType(0);
14620   SDLoc DL(N);
14621   const TargetOptions &Options = DAG.getTarget().Options;
14622   const SDNodeFlags Flags = N->getFlags();
14623   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14624 
14625   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14626     return R;
14627 
14628   // fold (fmul c1, c2) -> c1*c2
14629   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14630     return C;
14631 
14632   // canonicalize constant to RHS
14633   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14634      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14635     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14636 
14637   // fold vector ops
14638   if (VT.isVector())
14639     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14640       return FoldedVOp;
14641 
14642   if (SDValue NewSel = foldBinOpIntoSelect(N))
14643     return NewSel;
14644 
14645   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14646     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14647     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14648         N0.getOpcode() == ISD::FMUL) {
14649       SDValue N00 = N0.getOperand(0);
14650       SDValue N01 = N0.getOperand(1);
14651       // Avoid an infinite loop by making sure that N00 is not a constant
14652       // (the inner multiply has not been constant folded yet).
14653       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14654           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14655         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14656         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14657       }
14658     }
14659 
14660     // Match a special-case: we convert X * 2.0 into fadd.
14661     // fmul (fadd X, X), C -> fmul X, 2.0 * C
14662     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14663         N0.getOperand(0) == N0.getOperand(1)) {
14664       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14665       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14666       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14667     }
14668   }
14669 
14670   // fold (fmul X, 2.0) -> (fadd X, X)
14671   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14672     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14673 
14674   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14675   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14676     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14677       return DAG.getNode(ISD::FSUB, DL, VT,
14678                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14679     }
14680   }
14681 
14682   // -N0 * -N1 --> N0 * N1
14683   TargetLowering::NegatibleCost CostN0 =
14684       TargetLowering::NegatibleCost::Expensive;
14685   TargetLowering::NegatibleCost CostN1 =
14686       TargetLowering::NegatibleCost::Expensive;
14687   SDValue NegN0 =
14688       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14689   SDValue NegN1 =
14690       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14691   if (NegN0 && NegN1 &&
14692       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14693        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14694     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14695 
14696   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14697   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14698   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14699       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14700       TLI.isOperationLegal(ISD::FABS, VT)) {
14701     SDValue Select = N0, X = N1;
14702     if (Select.getOpcode() != ISD::SELECT)
14703       std::swap(Select, X);
14704 
14705     SDValue Cond = Select.getOperand(0);
14706     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14707     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14708 
14709     if (TrueOpnd && FalseOpnd &&
14710         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14711         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14712         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14713       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14714       switch (CC) {
14715       default: break;
14716       case ISD::SETOLT:
14717       case ISD::SETULT:
14718       case ISD::SETOLE:
14719       case ISD::SETULE:
14720       case ISD::SETLT:
14721       case ISD::SETLE:
14722         std::swap(TrueOpnd, FalseOpnd);
14723         LLVM_FALLTHROUGH;
14724       case ISD::SETOGT:
14725       case ISD::SETUGT:
14726       case ISD::SETOGE:
14727       case ISD::SETUGE:
14728       case ISD::SETGT:
14729       case ISD::SETGE:
14730         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14731             TLI.isOperationLegal(ISD::FNEG, VT))
14732           return DAG.getNode(ISD::FNEG, DL, VT,
14733                    DAG.getNode(ISD::FABS, DL, VT, X));
14734         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14735           return DAG.getNode(ISD::FABS, DL, VT, X);
14736 
14737         break;
14738       }
14739     }
14740   }
14741 
14742   // FMUL -> FMA combines:
14743   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14744     AddToWorklist(Fused.getNode());
14745     return Fused;
14746   }
14747 
14748   return SDValue();
14749 }
14750 
14751 SDValue DAGCombiner::visitFMA(SDNode *N) {
14752   SDValue N0 = N->getOperand(0);
14753   SDValue N1 = N->getOperand(1);
14754   SDValue N2 = N->getOperand(2);
14755   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14756   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14757   EVT VT = N->getValueType(0);
14758   SDLoc DL(N);
14759   const TargetOptions &Options = DAG.getTarget().Options;
14760   // FMA nodes have flags that propagate to the created nodes.
14761   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14762 
14763   bool UnsafeFPMath =
14764       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14765 
14766   // Constant fold FMA.
14767   if (isa<ConstantFPSDNode>(N0) &&
14768       isa<ConstantFPSDNode>(N1) &&
14769       isa<ConstantFPSDNode>(N2)) {
14770     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14771   }
14772 
14773   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14774   TargetLowering::NegatibleCost CostN0 =
14775       TargetLowering::NegatibleCost::Expensive;
14776   TargetLowering::NegatibleCost CostN1 =
14777       TargetLowering::NegatibleCost::Expensive;
14778   SDValue NegN0 =
14779       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14780   SDValue NegN1 =
14781       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14782   if (NegN0 && NegN1 &&
14783       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14784        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14785     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14786 
14787   if (UnsafeFPMath) {
14788     if (N0CFP && N0CFP->isZero())
14789       return N2;
14790     if (N1CFP && N1CFP->isZero())
14791       return N2;
14792   }
14793 
14794   if (N0CFP && N0CFP->isExactlyValue(1.0))
14795     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14796   if (N1CFP && N1CFP->isExactlyValue(1.0))
14797     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14798 
14799   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14800   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14801      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14802     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14803 
14804   if (UnsafeFPMath) {
14805     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14806     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14807         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14808         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14809       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14810                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14811     }
14812 
14813     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14814     if (N0.getOpcode() == ISD::FMUL &&
14815         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14816         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14817       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14818                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14819                          N2);
14820     }
14821   }
14822 
14823   // (fma x, -1, y) -> (fadd (fneg x), y)
14824   if (N1CFP) {
14825     if (N1CFP->isExactlyValue(1.0))
14826       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
14827 
14828     if (N1CFP->isExactlyValue(-1.0) &&
14829         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
14830       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
14831       AddToWorklist(RHSNeg.getNode());
14832       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
14833     }
14834 
14835     // fma (fneg x), K, y -> fma x -K, y
14836     if (N0.getOpcode() == ISD::FNEG &&
14837         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14838          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
14839                                               ForCodeSize)))) {
14840       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14841                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
14842     }
14843   }
14844 
14845   if (UnsafeFPMath) {
14846     // (fma x, c, x) -> (fmul x, (c+1))
14847     if (N1CFP && N0 == N2) {
14848       return DAG.getNode(
14849           ISD::FMUL, DL, VT, N0,
14850           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
14851     }
14852 
14853     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
14854     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
14855       return DAG.getNode(
14856           ISD::FMUL, DL, VT, N0,
14857           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
14858     }
14859   }
14860 
14861   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
14862   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
14863   if (!TLI.isFNegFree(VT))
14864     if (SDValue Neg = TLI.getCheaperNegatedExpression(
14865             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
14866       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
14867   return SDValue();
14868 }
14869 
14870 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14871 // reciprocal.
14872 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
14873 // Notice that this is not always beneficial. One reason is different targets
14874 // may have different costs for FDIV and FMUL, so sometimes the cost of two
14875 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
14876 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
14877 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
14878   // TODO: Limit this transform based on optsize/minsize - it always creates at
14879   //       least 1 extra instruction. But the perf win may be substantial enough
14880   //       that only minsize should restrict this.
14881   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
14882   const SDNodeFlags Flags = N->getFlags();
14883   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
14884     return SDValue();
14885 
14886   // Skip if current node is a reciprocal/fneg-reciprocal.
14887   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
14888   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14889   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14890     return SDValue();
14891 
14892   // Exit early if the target does not want this transform or if there can't
14893   // possibly be enough uses of the divisor to make the transform worthwhile.
14894   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14895 
14896   // For splat vectors, scale the number of uses by the splat factor. If we can
14897   // convert the division into a scalar op, that will likely be much faster.
14898   unsigned NumElts = 1;
14899   EVT VT = N->getValueType(0);
14900   if (VT.isVector() && DAG.isSplatValue(N1))
14901     NumElts = VT.getVectorMinNumElements();
14902 
14903   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14904     return SDValue();
14905 
14906   // Find all FDIV users of the same divisor.
14907   // Use a set because duplicates may be present in the user list.
14908   SetVector<SDNode *> Users;
14909   for (auto *U : N1->uses()) {
14910     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14911       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14912       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14913           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14914           U->getFlags().hasAllowReassociation() &&
14915           U->getFlags().hasNoSignedZeros())
14916         continue;
14917 
14918       // This division is eligible for optimization only if global unsafe math
14919       // is enabled or if this division allows reciprocal formation.
14920       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14921         Users.insert(U);
14922     }
14923   }
14924 
14925   // Now that we have the actual number of divisor uses, make sure it meets
14926   // the minimum threshold specified by the target.
14927   if ((Users.size() * NumElts) < MinUses)
14928     return SDValue();
14929 
14930   SDLoc DL(N);
14931   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14932   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14933 
14934   // Dividend / Divisor -> Dividend * Reciprocal
14935   for (auto *U : Users) {
14936     SDValue Dividend = U->getOperand(0);
14937     if (Dividend != FPOne) {
14938       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14939                                     Reciprocal, Flags);
14940       CombineTo(U, NewNode);
14941     } else if (U != Reciprocal.getNode()) {
14942       // In the absence of fast-math-flags, this user node is always the
14943       // same node as Reciprocal, but with FMF they may be different nodes.
14944       CombineTo(U, Reciprocal);
14945     }
14946   }
14947   return SDValue(N, 0);  // N was replaced.
14948 }
14949 
14950 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14951   SDValue N0 = N->getOperand(0);
14952   SDValue N1 = N->getOperand(1);
14953   EVT VT = N->getValueType(0);
14954   SDLoc DL(N);
14955   const TargetOptions &Options = DAG.getTarget().Options;
14956   SDNodeFlags Flags = N->getFlags();
14957   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14958 
14959   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14960     return R;
14961 
14962   // fold (fdiv c1, c2) -> c1/c2
14963   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
14964     return C;
14965 
14966   // fold vector ops
14967   if (VT.isVector())
14968     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14969       return FoldedVOp;
14970 
14971   if (SDValue NewSel = foldBinOpIntoSelect(N))
14972     return NewSel;
14973 
14974   if (SDValue V = combineRepeatedFPDivisors(N))
14975     return V;
14976 
14977   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14978     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14979     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14980       // Compute the reciprocal 1.0 / c2.
14981       const APFloat &N1APF = N1CFP->getValueAPF();
14982       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14983       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14984       // Only do the transform if the reciprocal is a legal fp immediate that
14985       // isn't too nasty (eg NaN, denormal, ...).
14986       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14987           (!LegalOperations ||
14988            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14989            // backend)... we should handle this gracefully after Legalize.
14990            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14991            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14992            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14993         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14994                            DAG.getConstantFP(Recip, DL, VT));
14995     }
14996 
14997     // If this FDIV is part of a reciprocal square root, it may be folded
14998     // into a target-specific square root estimate instruction.
14999     if (N1.getOpcode() == ISD::FSQRT) {
15000       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
15001         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15002     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
15003                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15004       if (SDValue RV =
15005               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15006         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
15007         AddToWorklist(RV.getNode());
15008         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15009       }
15010     } else if (N1.getOpcode() == ISD::FP_ROUND &&
15011                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15012       if (SDValue RV =
15013               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15014         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
15015         AddToWorklist(RV.getNode());
15016         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15017       }
15018     } else if (N1.getOpcode() == ISD::FMUL) {
15019       // Look through an FMUL. Even though this won't remove the FDIV directly,
15020       // it's still worthwhile to get rid of the FSQRT if possible.
15021       SDValue Sqrt, Y;
15022       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15023         Sqrt = N1.getOperand(0);
15024         Y = N1.getOperand(1);
15025       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
15026         Sqrt = N1.getOperand(1);
15027         Y = N1.getOperand(0);
15028       }
15029       if (Sqrt.getNode()) {
15030         // If the other multiply operand is known positive, pull it into the
15031         // sqrt. That will eliminate the division if we convert to an estimate.
15032         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
15033             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
15034           SDValue A;
15035           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
15036             A = Y.getOperand(0);
15037           else if (Y == Sqrt.getOperand(0))
15038             A = Y;
15039           if (A) {
15040             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
15041             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
15042             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
15043             SDValue AAZ =
15044                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
15045             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
15046               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
15047 
15048             // Estimate creation failed. Clean up speculatively created nodes.
15049             recursivelyDeleteUnusedNodes(AAZ.getNode());
15050           }
15051         }
15052 
15053         // We found a FSQRT, so try to make this fold:
15054         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
15055         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
15056           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
15057           AddToWorklist(Div.getNode());
15058           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
15059         }
15060       }
15061     }
15062 
15063     // Fold into a reciprocal estimate and multiply instead of a real divide.
15064     if (Options.NoInfsFPMath || Flags.hasNoInfs())
15065       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
15066         return RV;
15067   }
15068 
15069   // Fold X/Sqrt(X) -> Sqrt(X)
15070   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
15071       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
15072     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
15073       return N1;
15074 
15075   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
15076   TargetLowering::NegatibleCost CostN0 =
15077       TargetLowering::NegatibleCost::Expensive;
15078   TargetLowering::NegatibleCost CostN1 =
15079       TargetLowering::NegatibleCost::Expensive;
15080   SDValue NegN0 =
15081       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15082   SDValue NegN1 =
15083       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15084   if (NegN0 && NegN1 &&
15085       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15086        CostN1 == TargetLowering::NegatibleCost::Cheaper))
15087     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
15088 
15089   return SDValue();
15090 }
15091 
15092 SDValue DAGCombiner::visitFREM(SDNode *N) {
15093   SDValue N0 = N->getOperand(0);
15094   SDValue N1 = N->getOperand(1);
15095   EVT VT = N->getValueType(0);
15096   SDNodeFlags Flags = N->getFlags();
15097   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15098 
15099   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15100     return R;
15101 
15102   // fold (frem c1, c2) -> fmod(c1,c2)
15103   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
15104     return C;
15105 
15106   if (SDValue NewSel = foldBinOpIntoSelect(N))
15107     return NewSel;
15108 
15109   return SDValue();
15110 }
15111 
15112 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
15113   SDNodeFlags Flags = N->getFlags();
15114   const TargetOptions &Options = DAG.getTarget().Options;
15115 
15116   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
15117   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
15118   if (!Flags.hasApproximateFuncs() ||
15119       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
15120     return SDValue();
15121 
15122   SDValue N0 = N->getOperand(0);
15123   if (TLI.isFsqrtCheap(N0, DAG))
15124     return SDValue();
15125 
15126   // FSQRT nodes have flags that propagate to the created nodes.
15127   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
15128   //       transform the fdiv, we may produce a sub-optimal estimate sequence
15129   //       because the reciprocal calculation may not have to filter out a
15130   //       0.0 input.
15131   return buildSqrtEstimate(N0, Flags);
15132 }
15133 
15134 /// copysign(x, fp_extend(y)) -> copysign(x, y)
15135 /// copysign(x, fp_round(y)) -> copysign(x, y)
15136 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
15137   SDValue N1 = N->getOperand(1);
15138   if ((N1.getOpcode() == ISD::FP_EXTEND ||
15139        N1.getOpcode() == ISD::FP_ROUND)) {
15140     EVT N1VT = N1->getValueType(0);
15141     EVT N1Op0VT = N1->getOperand(0).getValueType();
15142 
15143     // Always fold no-op FP casts.
15144     if (N1VT == N1Op0VT)
15145       return true;
15146 
15147     // Do not optimize out type conversion of f128 type yet.
15148     // For some targets like x86_64, configuration is changed to keep one f128
15149     // value in one SSE register, but instruction selection cannot handle
15150     // FCOPYSIGN on SSE registers yet.
15151     if (N1Op0VT == MVT::f128)
15152       return false;
15153 
15154     // Avoid mismatched vector operand types, for better instruction selection.
15155     if (N1Op0VT.isVector())
15156       return false;
15157 
15158     return true;
15159   }
15160   return false;
15161 }
15162 
15163 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
15164   SDValue N0 = N->getOperand(0);
15165   SDValue N1 = N->getOperand(1);
15166   EVT VT = N->getValueType(0);
15167 
15168   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
15169   if (SDValue C =
15170           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
15171     return C;
15172 
15173   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
15174     const APFloat &V = N1C->getValueAPF();
15175     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
15176     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
15177     if (!V.isNegative()) {
15178       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
15179         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15180     } else {
15181       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
15182         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
15183                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
15184     }
15185   }
15186 
15187   // copysign(fabs(x), y) -> copysign(x, y)
15188   // copysign(fneg(x), y) -> copysign(x, y)
15189   // copysign(copysign(x,z), y) -> copysign(x, y)
15190   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
15191       N0.getOpcode() == ISD::FCOPYSIGN)
15192     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
15193 
15194   // copysign(x, abs(y)) -> abs(x)
15195   if (N1.getOpcode() == ISD::FABS)
15196     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15197 
15198   // copysign(x, copysign(y,z)) -> copysign(x, z)
15199   if (N1.getOpcode() == ISD::FCOPYSIGN)
15200     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
15201 
15202   // copysign(x, fp_extend(y)) -> copysign(x, y)
15203   // copysign(x, fp_round(y)) -> copysign(x, y)
15204   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
15205     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
15206 
15207   return SDValue();
15208 }
15209 
15210 SDValue DAGCombiner::visitFPOW(SDNode *N) {
15211   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
15212   if (!ExponentC)
15213     return SDValue();
15214   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15215 
15216   // Try to convert x ** (1/3) into cube root.
15217   // TODO: Handle the various flavors of long double.
15218   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
15219   //       Some range near 1/3 should be fine.
15220   EVT VT = N->getValueType(0);
15221   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
15222       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
15223     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
15224     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
15225     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
15226     // For regular numbers, rounding may cause the results to differ.
15227     // Therefore, we require { nsz ninf nnan afn } for this transform.
15228     // TODO: We could select out the special cases if we don't have nsz/ninf.
15229     SDNodeFlags Flags = N->getFlags();
15230     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
15231         !Flags.hasApproximateFuncs())
15232       return SDValue();
15233 
15234     // Do not create a cbrt() libcall if the target does not have it, and do not
15235     // turn a pow that has lowering support into a cbrt() libcall.
15236     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
15237         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
15238          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
15239       return SDValue();
15240 
15241     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
15242   }
15243 
15244   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
15245   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
15246   // TODO: This could be extended (using a target hook) to handle smaller
15247   // power-of-2 fractional exponents.
15248   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
15249   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
15250   if (ExponentIs025 || ExponentIs075) {
15251     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15252     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
15253     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15254     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
15255     // For regular numbers, rounding may cause the results to differ.
15256     // Therefore, we require { nsz ninf afn } for this transform.
15257     // TODO: We could select out the special cases if we don't have nsz/ninf.
15258     SDNodeFlags Flags = N->getFlags();
15259 
15260     // We only need no signed zeros for the 0.25 case.
15261     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15262         !Flags.hasApproximateFuncs())
15263       return SDValue();
15264 
15265     // Don't double the number of libcalls. We are trying to inline fast code.
15266     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
15267       return SDValue();
15268 
15269     // Assume that libcalls are the smallest code.
15270     // TODO: This restriction should probably be lifted for vectors.
15271     if (ForCodeSize)
15272       return SDValue();
15273 
15274     // pow(X, 0.25) --> sqrt(sqrt(X))
15275     SDLoc DL(N);
15276     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15277     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15278     if (ExponentIs025)
15279       return SqrtSqrt;
15280     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15281     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15282   }
15283 
15284   return SDValue();
15285 }
15286 
15287 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
15288                                const TargetLowering &TLI) {
15289   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15290   // replacing casts with a libcall. We also must be allowed to ignore -0.0
15291   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15292   // conversions would return +0.0.
15293   // FIXME: We should be able to use node-level FMF here.
15294   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15295   EVT VT = N->getValueType(0);
15296   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15297       !DAG.getTarget().Options.NoSignedZerosFPMath)
15298     return SDValue();
15299 
15300   // fptosi/fptoui round towards zero, so converting from FP to integer and
15301   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15302   SDValue N0 = N->getOperand(0);
15303   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15304       N0.getOperand(0).getValueType() == VT)
15305     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15306 
15307   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15308       N0.getOperand(0).getValueType() == VT)
15309     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15310 
15311   return SDValue();
15312 }
15313 
15314 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15315   SDValue N0 = N->getOperand(0);
15316   EVT VT = N->getValueType(0);
15317   EVT OpVT = N0.getValueType();
15318 
15319   // [us]itofp(undef) = 0, because the result value is bounded.
15320   if (N0.isUndef())
15321     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15322 
15323   // fold (sint_to_fp c1) -> c1fp
15324   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15325       // ...but only if the target supports immediate floating-point values
15326       (!LegalOperations ||
15327        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15328     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15329 
15330   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15331   // but UINT_TO_FP is legal on this target, try to convert.
15332   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15333       hasOperation(ISD::UINT_TO_FP, OpVT)) {
15334     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15335     if (DAG.SignBitIsZero(N0))
15336       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15337   }
15338 
15339   // The next optimizations are desirable only if SELECT_CC can be lowered.
15340   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15341   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15342       !VT.isVector() &&
15343       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15344     SDLoc DL(N);
15345     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15346                          DAG.getConstantFP(0.0, DL, VT));
15347   }
15348 
15349   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15350   //      (select (setcc x, y, cc), 1.0, 0.0)
15351   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15352       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15353       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15354     SDLoc DL(N);
15355     return DAG.getSelect(DL, VT, N0.getOperand(0),
15356                          DAG.getConstantFP(1.0, DL, VT),
15357                          DAG.getConstantFP(0.0, DL, VT));
15358   }
15359 
15360   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15361     return FTrunc;
15362 
15363   return SDValue();
15364 }
15365 
15366 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15367   SDValue N0 = N->getOperand(0);
15368   EVT VT = N->getValueType(0);
15369   EVT OpVT = N0.getValueType();
15370 
15371   // [us]itofp(undef) = 0, because the result value is bounded.
15372   if (N0.isUndef())
15373     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15374 
15375   // fold (uint_to_fp c1) -> c1fp
15376   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15377       // ...but only if the target supports immediate floating-point values
15378       (!LegalOperations ||
15379        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15380     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15381 
15382   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15383   // but SINT_TO_FP is legal on this target, try to convert.
15384   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15385       hasOperation(ISD::SINT_TO_FP, OpVT)) {
15386     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15387     if (DAG.SignBitIsZero(N0))
15388       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15389   }
15390 
15391   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15392   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15393       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15394     SDLoc DL(N);
15395     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15396                          DAG.getConstantFP(0.0, DL, VT));
15397   }
15398 
15399   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15400     return FTrunc;
15401 
15402   return SDValue();
15403 }
15404 
15405 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15406 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15407   SDValue N0 = N->getOperand(0);
15408   EVT VT = N->getValueType(0);
15409 
15410   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15411     return SDValue();
15412 
15413   SDValue Src = N0.getOperand(0);
15414   EVT SrcVT = Src.getValueType();
15415   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15416   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15417 
15418   // We can safely assume the conversion won't overflow the output range,
15419   // because (for example) (uint8_t)18293.f is undefined behavior.
15420 
15421   // Since we can assume the conversion won't overflow, our decision as to
15422   // whether the input will fit in the float should depend on the minimum
15423   // of the input range and output range.
15424 
15425   // This means this is also safe for a signed input and unsigned output, since
15426   // a negative input would lead to undefined behavior.
15427   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15428   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
15429   unsigned ActualSize = std::min(InputSize, OutputSize);
15430   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15431 
15432   // We can only fold away the float conversion if the input range can be
15433   // represented exactly in the float range.
15434   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15435     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15436       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15437                                                        : ISD::ZERO_EXTEND;
15438       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15439     }
15440     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15441       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15442     return DAG.getBitcast(VT, Src);
15443   }
15444   return SDValue();
15445 }
15446 
15447 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15448   SDValue N0 = N->getOperand(0);
15449   EVT VT = N->getValueType(0);
15450 
15451   // fold (fp_to_sint undef) -> undef
15452   if (N0.isUndef())
15453     return DAG.getUNDEF(VT);
15454 
15455   // fold (fp_to_sint c1fp) -> c1
15456   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15457     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15458 
15459   return FoldIntToFPToInt(N, DAG);
15460 }
15461 
15462 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15463   SDValue N0 = N->getOperand(0);
15464   EVT VT = N->getValueType(0);
15465 
15466   // fold (fp_to_uint undef) -> undef
15467   if (N0.isUndef())
15468     return DAG.getUNDEF(VT);
15469 
15470   // fold (fp_to_uint c1fp) -> c1
15471   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15472     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15473 
15474   return FoldIntToFPToInt(N, DAG);
15475 }
15476 
15477 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15478   SDValue N0 = N->getOperand(0);
15479   SDValue N1 = N->getOperand(1);
15480   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15481   EVT VT = N->getValueType(0);
15482 
15483   // fold (fp_round c1fp) -> c1fp
15484   if (N0CFP)
15485     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15486 
15487   // fold (fp_round (fp_extend x)) -> x
15488   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15489     return N0.getOperand(0);
15490 
15491   // fold (fp_round (fp_round x)) -> (fp_round x)
15492   if (N0.getOpcode() == ISD::FP_ROUND) {
15493     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15494     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15495 
15496     // Skip this folding if it results in an fp_round from f80 to f16.
15497     //
15498     // f80 to f16 always generates an expensive (and as yet, unimplemented)
15499     // libcall to __truncxfhf2 instead of selecting native f16 conversion
15500     // instructions from f32 or f64.  Moreover, the first (value-preserving)
15501     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15502     // x86.
15503     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15504       return SDValue();
15505 
15506     // If the first fp_round isn't a value preserving truncation, it might
15507     // introduce a tie in the second fp_round, that wouldn't occur in the
15508     // single-step fp_round we want to fold to.
15509     // In other words, double rounding isn't the same as rounding.
15510     // Also, this is a value preserving truncation iff both fp_round's are.
15511     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15512       SDLoc DL(N);
15513       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15514                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15515     }
15516   }
15517 
15518   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15519   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
15520     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15521                               N0.getOperand(0), N1);
15522     AddToWorklist(Tmp.getNode());
15523     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15524                        Tmp, N0.getOperand(1));
15525   }
15526 
15527   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15528     return NewVSel;
15529 
15530   return SDValue();
15531 }
15532 
15533 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15534   SDValue N0 = N->getOperand(0);
15535   EVT VT = N->getValueType(0);
15536 
15537   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15538   if (N->hasOneUse() &&
15539       N->use_begin()->getOpcode() == ISD::FP_ROUND)
15540     return SDValue();
15541 
15542   // fold (fp_extend c1fp) -> c1fp
15543   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15544     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15545 
15546   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15547   if (N0.getOpcode() == ISD::FP16_TO_FP &&
15548       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15549     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15550 
15551   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15552   // value of X.
15553   if (N0.getOpcode() == ISD::FP_ROUND
15554       && N0.getConstantOperandVal(1) == 1) {
15555     SDValue In = N0.getOperand(0);
15556     if (In.getValueType() == VT) return In;
15557     if (VT.bitsLT(In.getValueType()))
15558       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15559                          In, N0.getOperand(1));
15560     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15561   }
15562 
15563   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15564   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15565       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15566     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15567     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15568                                      LN0->getChain(),
15569                                      LN0->getBasePtr(), N0.getValueType(),
15570                                      LN0->getMemOperand());
15571     CombineTo(N, ExtLoad);
15572     CombineTo(N0.getNode(),
15573               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15574                           N0.getValueType(), ExtLoad,
15575                           DAG.getIntPtrConstant(1, SDLoc(N0))),
15576               ExtLoad.getValue(1));
15577     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15578   }
15579 
15580   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15581     return NewVSel;
15582 
15583   return SDValue();
15584 }
15585 
15586 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15587   SDValue N0 = N->getOperand(0);
15588   EVT VT = N->getValueType(0);
15589 
15590   // fold (fceil c1) -> fceil(c1)
15591   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15592     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15593 
15594   return SDValue();
15595 }
15596 
15597 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15598   SDValue N0 = N->getOperand(0);
15599   EVT VT = N->getValueType(0);
15600 
15601   // fold (ftrunc c1) -> ftrunc(c1)
15602   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15603     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15604 
15605   // fold ftrunc (known rounded int x) -> x
15606   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15607   // likely to be generated to extract integer from a rounded floating value.
15608   switch (N0.getOpcode()) {
15609   default: break;
15610   case ISD::FRINT:
15611   case ISD::FTRUNC:
15612   case ISD::FNEARBYINT:
15613   case ISD::FFLOOR:
15614   case ISD::FCEIL:
15615     return N0;
15616   }
15617 
15618   return SDValue();
15619 }
15620 
15621 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15622   SDValue N0 = N->getOperand(0);
15623   EVT VT = N->getValueType(0);
15624 
15625   // fold (ffloor c1) -> ffloor(c1)
15626   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15627     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15628 
15629   return SDValue();
15630 }
15631 
15632 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15633   SDValue N0 = N->getOperand(0);
15634   EVT VT = N->getValueType(0);
15635   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15636 
15637   // Constant fold FNEG.
15638   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15639     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15640 
15641   if (SDValue NegN0 =
15642           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15643     return NegN0;
15644 
15645   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15646   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15647   // know it was called from a context with a nsz flag if the input fsub does
15648   // not.
15649   if (N0.getOpcode() == ISD::FSUB &&
15650       (DAG.getTarget().Options.NoSignedZerosFPMath ||
15651        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15652     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15653                        N0.getOperand(0));
15654   }
15655 
15656   if (SDValue Cast = foldSignChangeInBitcast(N))
15657     return Cast;
15658 
15659   return SDValue();
15660 }
15661 
15662 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15663   SDValue N0 = N->getOperand(0);
15664   SDValue N1 = N->getOperand(1);
15665   EVT VT = N->getValueType(0);
15666   const SDNodeFlags Flags = N->getFlags();
15667   unsigned Opc = N->getOpcode();
15668   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15669   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15670   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15671 
15672   // Constant fold.
15673   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15674     return C;
15675 
15676   // Canonicalize to constant on RHS.
15677   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15678       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15679     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15680 
15681   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15682     const APFloat &AF = N1CFP->getValueAPF();
15683 
15684     // minnum(X, nan) -> X
15685     // maxnum(X, nan) -> X
15686     // minimum(X, nan) -> nan
15687     // maximum(X, nan) -> nan
15688     if (AF.isNaN())
15689       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15690 
15691     // In the following folds, inf can be replaced with the largest finite
15692     // float, if the ninf flag is set.
15693     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15694       // minnum(X, -inf) -> -inf
15695       // maxnum(X, +inf) -> +inf
15696       // minimum(X, -inf) -> -inf if nnan
15697       // maximum(X, +inf) -> +inf if nnan
15698       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15699         return N->getOperand(1);
15700 
15701       // minnum(X, +inf) -> X if nnan
15702       // maxnum(X, -inf) -> X if nnan
15703       // minimum(X, +inf) -> X
15704       // maximum(X, -inf) -> X
15705       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15706         return N->getOperand(0);
15707     }
15708   }
15709 
15710   return SDValue();
15711 }
15712 
15713 SDValue DAGCombiner::visitFABS(SDNode *N) {
15714   SDValue N0 = N->getOperand(0);
15715   EVT VT = N->getValueType(0);
15716 
15717   // fold (fabs c1) -> fabs(c1)
15718   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15719     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15720 
15721   // fold (fabs (fabs x)) -> (fabs x)
15722   if (N0.getOpcode() == ISD::FABS)
15723     return N->getOperand(0);
15724 
15725   // fold (fabs (fneg x)) -> (fabs x)
15726   // fold (fabs (fcopysign x, y)) -> (fabs x)
15727   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15728     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15729 
15730   if (SDValue Cast = foldSignChangeInBitcast(N))
15731     return Cast;
15732 
15733   return SDValue();
15734 }
15735 
15736 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15737   SDValue Chain = N->getOperand(0);
15738   SDValue N1 = N->getOperand(1);
15739   SDValue N2 = N->getOperand(2);
15740 
15741   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15742   // nondeterministic jumps).
15743   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15744     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15745                        N1->getOperand(0), N2);
15746   }
15747 
15748   // If N is a constant we could fold this into a fallthrough or unconditional
15749   // branch. However that doesn't happen very often in normal code, because
15750   // Instcombine/SimplifyCFG should have handled the available opportunities.
15751   // If we did this folding here, it would be necessary to update the
15752   // MachineBasicBlock CFG, which is awkward.
15753 
15754   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15755   // on the target.
15756   if (N1.getOpcode() == ISD::SETCC &&
15757       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15758                                    N1.getOperand(0).getValueType())) {
15759     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15760                        Chain, N1.getOperand(2),
15761                        N1.getOperand(0), N1.getOperand(1), N2);
15762   }
15763 
15764   if (N1.hasOneUse()) {
15765     // rebuildSetCC calls visitXor which may change the Chain when there is a
15766     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15767     HandleSDNode ChainHandle(Chain);
15768     if (SDValue NewN1 = rebuildSetCC(N1))
15769       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15770                          ChainHandle.getValue(), NewN1, N2);
15771   }
15772 
15773   return SDValue();
15774 }
15775 
15776 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15777   if (N.getOpcode() == ISD::SRL ||
15778       (N.getOpcode() == ISD::TRUNCATE &&
15779        (N.getOperand(0).hasOneUse() &&
15780         N.getOperand(0).getOpcode() == ISD::SRL))) {
15781     // Look pass the truncate.
15782     if (N.getOpcode() == ISD::TRUNCATE)
15783       N = N.getOperand(0);
15784 
15785     // Match this pattern so that we can generate simpler code:
15786     //
15787     //   %a = ...
15788     //   %b = and i32 %a, 2
15789     //   %c = srl i32 %b, 1
15790     //   brcond i32 %c ...
15791     //
15792     // into
15793     //
15794     //   %a = ...
15795     //   %b = and i32 %a, 2
15796     //   %c = setcc eq %b, 0
15797     //   brcond %c ...
15798     //
15799     // This applies only when the AND constant value has one bit set and the
15800     // SRL constant is equal to the log2 of the AND constant. The back-end is
15801     // smart enough to convert the result into a TEST/JMP sequence.
15802     SDValue Op0 = N.getOperand(0);
15803     SDValue Op1 = N.getOperand(1);
15804 
15805     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15806       SDValue AndOp1 = Op0.getOperand(1);
15807 
15808       if (AndOp1.getOpcode() == ISD::Constant) {
15809         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15810 
15811         if (AndConst.isPowerOf2() &&
15812             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15813           SDLoc DL(N);
15814           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15815                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15816                               ISD::SETNE);
15817         }
15818       }
15819     }
15820   }
15821 
15822   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
15823   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
15824   if (N.getOpcode() == ISD::XOR) {
15825     // Because we may call this on a speculatively constructed
15826     // SimplifiedSetCC Node, we need to simplify this node first.
15827     // Ideally this should be folded into SimplifySetCC and not
15828     // here. For now, grab a handle to N so we don't lose it from
15829     // replacements interal to the visit.
15830     HandleSDNode XORHandle(N);
15831     while (N.getOpcode() == ISD::XOR) {
15832       SDValue Tmp = visitXOR(N.getNode());
15833       // No simplification done.
15834       if (!Tmp.getNode())
15835         break;
15836       // Returning N is form in-visit replacement that may invalidated
15837       // N. Grab value from Handle.
15838       if (Tmp.getNode() == N.getNode())
15839         N = XORHandle.getValue();
15840       else // Node simplified. Try simplifying again.
15841         N = Tmp;
15842     }
15843 
15844     if (N.getOpcode() != ISD::XOR)
15845       return N;
15846 
15847     SDValue Op0 = N->getOperand(0);
15848     SDValue Op1 = N->getOperand(1);
15849 
15850     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
15851       bool Equal = false;
15852       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
15853       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
15854           Op0.getValueType() == MVT::i1) {
15855         N = Op0;
15856         Op0 = N->getOperand(0);
15857         Op1 = N->getOperand(1);
15858         Equal = true;
15859       }
15860 
15861       EVT SetCCVT = N.getValueType();
15862       if (LegalTypes)
15863         SetCCVT = getSetCCResultType(SetCCVT);
15864       // Replace the uses of XOR with SETCC
15865       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15866                           Equal ? ISD::SETEQ : ISD::SETNE);
15867     }
15868   }
15869 
15870   return SDValue();
15871 }
15872 
15873 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15874 //
15875 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15876   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15877   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15878 
15879   // If N is a constant we could fold this into a fallthrough or unconditional
15880   // branch. However that doesn't happen very often in normal code, because
15881   // Instcombine/SimplifyCFG should have handled the available opportunities.
15882   // If we did this folding here, it would be necessary to update the
15883   // MachineBasicBlock CFG, which is awkward.
15884 
15885   // Use SimplifySetCC to simplify SETCC's.
15886   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15887                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15888                                false);
15889   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15890 
15891   // fold to a simpler setcc
15892   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15893     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15894                        N->getOperand(0), Simp.getOperand(2),
15895                        Simp.getOperand(0), Simp.getOperand(1),
15896                        N->getOperand(4));
15897 
15898   return SDValue();
15899 }
15900 
15901 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15902                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15903                                      const TargetLowering &TLI) {
15904   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15905     if (LD->isIndexed())
15906       return false;
15907     EVT VT = LD->getMemoryVT();
15908     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15909       return false;
15910     Ptr = LD->getBasePtr();
15911   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15912     if (ST->isIndexed())
15913       return false;
15914     EVT VT = ST->getMemoryVT();
15915     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15916       return false;
15917     Ptr = ST->getBasePtr();
15918     IsLoad = false;
15919   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15920     if (LD->isIndexed())
15921       return false;
15922     EVT VT = LD->getMemoryVT();
15923     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15924         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15925       return false;
15926     Ptr = LD->getBasePtr();
15927     IsMasked = true;
15928   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15929     if (ST->isIndexed())
15930       return false;
15931     EVT VT = ST->getMemoryVT();
15932     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15933         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15934       return false;
15935     Ptr = ST->getBasePtr();
15936     IsLoad = false;
15937     IsMasked = true;
15938   } else {
15939     return false;
15940   }
15941   return true;
15942 }
15943 
15944 /// Try turning a load/store into a pre-indexed load/store when the base
15945 /// pointer is an add or subtract and it has other uses besides the load/store.
15946 /// After the transformation, the new indexed load/store has effectively folded
15947 /// the add/subtract in and all of its other uses are redirected to the
15948 /// new load/store.
15949 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15950   if (Level < AfterLegalizeDAG)
15951     return false;
15952 
15953   bool IsLoad = true;
15954   bool IsMasked = false;
15955   SDValue Ptr;
15956   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15957                                 Ptr, TLI))
15958     return false;
15959 
15960   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15961   // out.  There is no reason to make this a preinc/predec.
15962   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15963       Ptr->hasOneUse())
15964     return false;
15965 
15966   // Ask the target to do addressing mode selection.
15967   SDValue BasePtr;
15968   SDValue Offset;
15969   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15970   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15971     return false;
15972 
15973   // Backends without true r+i pre-indexed forms may need to pass a
15974   // constant base with a variable offset so that constant coercion
15975   // will work with the patterns in canonical form.
15976   bool Swapped = false;
15977   if (isa<ConstantSDNode>(BasePtr)) {
15978     std::swap(BasePtr, Offset);
15979     Swapped = true;
15980   }
15981 
15982   // Don't create a indexed load / store with zero offset.
15983   if (isNullConstant(Offset))
15984     return false;
15985 
15986   // Try turning it into a pre-indexed load / store except when:
15987   // 1) The new base ptr is a frame index.
15988   // 2) If N is a store and the new base ptr is either the same as or is a
15989   //    predecessor of the value being stored.
15990   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15991   //    that would create a cycle.
15992   // 4) All uses are load / store ops that use it as old base ptr.
15993 
15994   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15995   // (plus the implicit offset) to a register to preinc anyway.
15996   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15997     return false;
15998 
15999   // Check #2.
16000   if (!IsLoad) {
16001     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
16002                            : cast<StoreSDNode>(N)->getValue();
16003 
16004     // Would require a copy.
16005     if (Val == BasePtr)
16006       return false;
16007 
16008     // Would create a cycle.
16009     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
16010       return false;
16011   }
16012 
16013   // Caches for hasPredecessorHelper.
16014   SmallPtrSet<const SDNode *, 32> Visited;
16015   SmallVector<const SDNode *, 16> Worklist;
16016   Worklist.push_back(N);
16017 
16018   // If the offset is a constant, there may be other adds of constants that
16019   // can be folded with this one. We should do this to avoid having to keep
16020   // a copy of the original base pointer.
16021   SmallVector<SDNode *, 16> OtherUses;
16022   if (isa<ConstantSDNode>(Offset))
16023     for (SDNode::use_iterator UI = BasePtr->use_begin(),
16024                               UE = BasePtr->use_end();
16025          UI != UE; ++UI) {
16026       SDUse &Use = UI.getUse();
16027       // Skip the use that is Ptr and uses of other results from BasePtr's
16028       // node (important for nodes that return multiple results).
16029       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
16030         continue;
16031 
16032       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
16033         continue;
16034 
16035       if (Use.getUser()->getOpcode() != ISD::ADD &&
16036           Use.getUser()->getOpcode() != ISD::SUB) {
16037         OtherUses.clear();
16038         break;
16039       }
16040 
16041       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
16042       if (!isa<ConstantSDNode>(Op1)) {
16043         OtherUses.clear();
16044         break;
16045       }
16046 
16047       // FIXME: In some cases, we can be smarter about this.
16048       if (Op1.getValueType() != Offset.getValueType()) {
16049         OtherUses.clear();
16050         break;
16051       }
16052 
16053       OtherUses.push_back(Use.getUser());
16054     }
16055 
16056   if (Swapped)
16057     std::swap(BasePtr, Offset);
16058 
16059   // Now check for #3 and #4.
16060   bool RealUse = false;
16061 
16062   for (SDNode *Use : Ptr->uses()) {
16063     if (Use == N)
16064       continue;
16065     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
16066       return false;
16067 
16068     // If Ptr may be folded in addressing mode of other use, then it's
16069     // not profitable to do this transformation.
16070     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
16071       RealUse = true;
16072   }
16073 
16074   if (!RealUse)
16075     return false;
16076 
16077   SDValue Result;
16078   if (!IsMasked) {
16079     if (IsLoad)
16080       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16081     else
16082       Result =
16083           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16084   } else {
16085     if (IsLoad)
16086       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16087                                         Offset, AM);
16088     else
16089       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
16090                                          Offset, AM);
16091   }
16092   ++PreIndexedNodes;
16093   ++NodesCombined;
16094   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
16095              Result.dump(&DAG); dbgs() << '\n');
16096   WorklistRemover DeadNodes(*this);
16097   if (IsLoad) {
16098     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16099     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16100   } else {
16101     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16102   }
16103 
16104   // Finally, since the node is now dead, remove it from the graph.
16105   deleteAndRecombine(N);
16106 
16107   if (Swapped)
16108     std::swap(BasePtr, Offset);
16109 
16110   // Replace other uses of BasePtr that can be updated to use Ptr
16111   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
16112     unsigned OffsetIdx = 1;
16113     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
16114       OffsetIdx = 0;
16115     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
16116            BasePtr.getNode() && "Expected BasePtr operand");
16117 
16118     // We need to replace ptr0 in the following expression:
16119     //   x0 * offset0 + y0 * ptr0 = t0
16120     // knowing that
16121     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
16122     //
16123     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
16124     // indexed load/store and the expression that needs to be re-written.
16125     //
16126     // Therefore, we have:
16127     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
16128 
16129     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
16130     const APInt &Offset0 = CN->getAPIntValue();
16131     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
16132     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
16133     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
16134     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
16135     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
16136 
16137     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
16138 
16139     APInt CNV = Offset0;
16140     if (X0 < 0) CNV = -CNV;
16141     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
16142     else CNV = CNV - Offset1;
16143 
16144     SDLoc DL(OtherUses[i]);
16145 
16146     // We can now generate the new expression.
16147     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
16148     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
16149 
16150     SDValue NewUse = DAG.getNode(Opcode,
16151                                  DL,
16152                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
16153     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
16154     deleteAndRecombine(OtherUses[i]);
16155   }
16156 
16157   // Replace the uses of Ptr with uses of the updated base value.
16158   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
16159   deleteAndRecombine(Ptr.getNode());
16160   AddToWorklist(Result.getNode());
16161 
16162   return true;
16163 }
16164 
16165 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
16166                                    SDValue &BasePtr, SDValue &Offset,
16167                                    ISD::MemIndexedMode &AM,
16168                                    SelectionDAG &DAG,
16169                                    const TargetLowering &TLI) {
16170   if (PtrUse == N ||
16171       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
16172     return false;
16173 
16174   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
16175     return false;
16176 
16177   // Don't create a indexed load / store with zero offset.
16178   if (isNullConstant(Offset))
16179     return false;
16180 
16181   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16182     return false;
16183 
16184   SmallPtrSet<const SDNode *, 32> Visited;
16185   for (SDNode *Use : BasePtr->uses()) {
16186     if (Use == Ptr.getNode())
16187       continue;
16188 
16189     // No if there's a later user which could perform the index instead.
16190     if (isa<MemSDNode>(Use)) {
16191       bool IsLoad = true;
16192       bool IsMasked = false;
16193       SDValue OtherPtr;
16194       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16195                                    IsMasked, OtherPtr, TLI)) {
16196         SmallVector<const SDNode *, 2> Worklist;
16197         Worklist.push_back(Use);
16198         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
16199           return false;
16200       }
16201     }
16202 
16203     // If all the uses are load / store addresses, then don't do the
16204     // transformation.
16205     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
16206       for (SDNode *UseUse : Use->uses())
16207         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
16208           return false;
16209     }
16210   }
16211   return true;
16212 }
16213 
16214 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
16215                                          bool &IsMasked, SDValue &Ptr,
16216                                          SDValue &BasePtr, SDValue &Offset,
16217                                          ISD::MemIndexedMode &AM,
16218                                          SelectionDAG &DAG,
16219                                          const TargetLowering &TLI) {
16220   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16221                                 IsMasked, Ptr, TLI) ||
16222       Ptr->hasOneUse())
16223     return nullptr;
16224 
16225   // Try turning it into a post-indexed load / store except when
16226   // 1) All uses are load / store ops that use it as base ptr (and
16227   //    it may be folded as addressing mmode).
16228   // 2) Op must be independent of N, i.e. Op is neither a predecessor
16229   //    nor a successor of N. Otherwise, if Op is folded that would
16230   //    create a cycle.
16231   for (SDNode *Op : Ptr->uses()) {
16232     // Check for #1.
16233     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
16234       continue;
16235 
16236     // Check for #2.
16237     SmallPtrSet<const SDNode *, 32> Visited;
16238     SmallVector<const SDNode *, 8> Worklist;
16239     // Ptr is predecessor to both N and Op.
16240     Visited.insert(Ptr.getNode());
16241     Worklist.push_back(N);
16242     Worklist.push_back(Op);
16243     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
16244         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
16245       return Op;
16246   }
16247   return nullptr;
16248 }
16249 
16250 /// Try to combine a load/store with a add/sub of the base pointer node into a
16251 /// post-indexed load/store. The transformation folded the add/subtract into the
16252 /// new indexed load/store effectively and all of its uses are redirected to the
16253 /// new load/store.
16254 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16255   if (Level < AfterLegalizeDAG)
16256     return false;
16257 
16258   bool IsLoad = true;
16259   bool IsMasked = false;
16260   SDValue Ptr;
16261   SDValue BasePtr;
16262   SDValue Offset;
16263   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16264   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16265                                          Offset, AM, DAG, TLI);
16266   if (!Op)
16267     return false;
16268 
16269   SDValue Result;
16270   if (!IsMasked)
16271     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16272                                          Offset, AM)
16273                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16274                                           BasePtr, Offset, AM);
16275   else
16276     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16277                                                BasePtr, Offset, AM)
16278                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16279                                                 BasePtr, Offset, AM);
16280   ++PostIndexedNodes;
16281   ++NodesCombined;
16282   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
16283              Result.dump(&DAG); dbgs() << '\n');
16284   WorklistRemover DeadNodes(*this);
16285   if (IsLoad) {
16286     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16287     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16288   } else {
16289     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16290   }
16291 
16292   // Finally, since the node is now dead, remove it from the graph.
16293   deleteAndRecombine(N);
16294 
16295   // Replace the uses of Use with uses of the updated base value.
16296   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
16297                                 Result.getValue(IsLoad ? 1 : 0));
16298   deleteAndRecombine(Op);
16299   return true;
16300 }
16301 
16302 /// Return the base-pointer arithmetic from an indexed \p LD.
16303 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16304   ISD::MemIndexedMode AM = LD->getAddressingMode();
16305   assert(AM != ISD::UNINDEXED);
16306   SDValue BP = LD->getOperand(1);
16307   SDValue Inc = LD->getOperand(2);
16308 
16309   // Some backends use TargetConstants for load offsets, but don't expect
16310   // TargetConstants in general ADD nodes. We can convert these constants into
16311   // regular Constants (if the constant is not opaque).
16312   assert((Inc.getOpcode() != ISD::TargetConstant ||
16313           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16314          "Cannot split out indexing using opaque target constants");
16315   if (Inc.getOpcode() == ISD::TargetConstant) {
16316     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16317     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16318                           ConstInc->getValueType(0));
16319   }
16320 
16321   unsigned Opc =
16322       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16323   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16324 }
16325 
16326 static inline ElementCount numVectorEltsOrZero(EVT T) {
16327   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16328 }
16329 
16330 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16331   Val = ST->getValue();
16332   EVT STType = Val.getValueType();
16333   EVT STMemType = ST->getMemoryVT();
16334   if (STType == STMemType)
16335     return true;
16336   if (isTypeLegal(STMemType))
16337     return false; // fail.
16338   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16339       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16340     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16341     return true;
16342   }
16343   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16344       STType.isInteger() && STMemType.isInteger()) {
16345     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16346     return true;
16347   }
16348   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16349     Val = DAG.getBitcast(STMemType, Val);
16350     return true;
16351   }
16352   return false; // fail.
16353 }
16354 
16355 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16356   EVT LDMemType = LD->getMemoryVT();
16357   EVT LDType = LD->getValueType(0);
16358   assert(Val.getValueType() == LDMemType &&
16359          "Attempting to extend value of non-matching type");
16360   if (LDType == LDMemType)
16361     return true;
16362   if (LDMemType.isInteger() && LDType.isInteger()) {
16363     switch (LD->getExtensionType()) {
16364     case ISD::NON_EXTLOAD:
16365       Val = DAG.getBitcast(LDType, Val);
16366       return true;
16367     case ISD::EXTLOAD:
16368       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16369       return true;
16370     case ISD::SEXTLOAD:
16371       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16372       return true;
16373     case ISD::ZEXTLOAD:
16374       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16375       return true;
16376     }
16377   }
16378   return false;
16379 }
16380 
16381 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16382   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16383     return SDValue();
16384   SDValue Chain = LD->getOperand(0);
16385   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16386   // TODO: Relax this restriction for unordered atomics (see D66309)
16387   if (!ST || !ST->isSimple())
16388     return SDValue();
16389 
16390   EVT LDType = LD->getValueType(0);
16391   EVT LDMemType = LD->getMemoryVT();
16392   EVT STMemType = ST->getMemoryVT();
16393   EVT STType = ST->getValue().getValueType();
16394 
16395   // There are two cases to consider here:
16396   //  1. The store is fixed width and the load is scalable. In this case we
16397   //     don't know at compile time if the store completely envelops the load
16398   //     so we abandon the optimisation.
16399   //  2. The store is scalable and the load is fixed width. We could
16400   //     potentially support a limited number of cases here, but there has been
16401   //     no cost-benefit analysis to prove it's worth it.
16402   bool LdStScalable = LDMemType.isScalableVector();
16403   if (LdStScalable != STMemType.isScalableVector())
16404     return SDValue();
16405 
16406   // If we are dealing with scalable vectors on a big endian platform the
16407   // calculation of offsets below becomes trickier, since we do not know at
16408   // compile time the absolute size of the vector. Until we've done more
16409   // analysis on big-endian platforms it seems better to bail out for now.
16410   if (LdStScalable && DAG.getDataLayout().isBigEndian())
16411     return SDValue();
16412 
16413   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16414   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16415   int64_t Offset;
16416   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16417     return SDValue();
16418 
16419   // Normalize for Endianness. After this Offset=0 will denote that the least
16420   // significant bit in the loaded value maps to the least significant bit in
16421   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16422   // n:th least significant byte of the stored value.
16423   if (DAG.getDataLayout().isBigEndian())
16424     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16425               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16426                  8 -
16427              Offset;
16428 
16429   // Check that the stored value cover all bits that are loaded.
16430   bool STCoversLD;
16431 
16432   TypeSize LdMemSize = LDMemType.getSizeInBits();
16433   TypeSize StMemSize = STMemType.getSizeInBits();
16434   if (LdStScalable)
16435     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16436   else
16437     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16438                                    StMemSize.getFixedSize());
16439 
16440   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16441     if (LD->isIndexed()) {
16442       // Cannot handle opaque target constants and we must respect the user's
16443       // request not to split indexes from loads.
16444       if (!canSplitIdx(LD))
16445         return SDValue();
16446       SDValue Idx = SplitIndexingFromLoad(LD);
16447       SDValue Ops[] = {Val, Idx, Chain};
16448       return CombineTo(LD, Ops, 3);
16449     }
16450     return CombineTo(LD, Val, Chain);
16451   };
16452 
16453   if (!STCoversLD)
16454     return SDValue();
16455 
16456   // Memory as copy space (potentially masked).
16457   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16458     // Simple case: Direct non-truncating forwarding
16459     if (LDType.getSizeInBits() == LdMemSize)
16460       return ReplaceLd(LD, ST->getValue(), Chain);
16461     // Can we model the truncate and extension with an and mask?
16462     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16463         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16464       // Mask to size of LDMemType
16465       auto Mask =
16466           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16467                                                StMemSize.getFixedSize()),
16468                           SDLoc(ST), STType);
16469       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16470       return ReplaceLd(LD, Val, Chain);
16471     }
16472   }
16473 
16474   // TODO: Deal with nonzero offset.
16475   if (LD->getBasePtr().isUndef() || Offset != 0)
16476     return SDValue();
16477   // Model necessary truncations / extenstions.
16478   SDValue Val;
16479   // Truncate Value To Stored Memory Size.
16480   do {
16481     if (!getTruncatedStoreValue(ST, Val))
16482       continue;
16483     if (!isTypeLegal(LDMemType))
16484       continue;
16485     if (STMemType != LDMemType) {
16486       // TODO: Support vectors? This requires extract_subvector/bitcast.
16487       if (!STMemType.isVector() && !LDMemType.isVector() &&
16488           STMemType.isInteger() && LDMemType.isInteger())
16489         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16490       else
16491         continue;
16492     }
16493     if (!extendLoadedValueToExtension(LD, Val))
16494       continue;
16495     return ReplaceLd(LD, Val, Chain);
16496   } while (false);
16497 
16498   // On failure, cleanup dead nodes we may have created.
16499   if (Val->use_empty())
16500     deleteAndRecombine(Val.getNode());
16501   return SDValue();
16502 }
16503 
16504 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16505   LoadSDNode *LD  = cast<LoadSDNode>(N);
16506   SDValue Chain = LD->getChain();
16507   SDValue Ptr   = LD->getBasePtr();
16508 
16509   // If load is not volatile and there are no uses of the loaded value (and
16510   // the updated indexed value in case of indexed loads), change uses of the
16511   // chain value into uses of the chain input (i.e. delete the dead load).
16512   // TODO: Allow this for unordered atomics (see D66309)
16513   if (LD->isSimple()) {
16514     if (N->getValueType(1) == MVT::Other) {
16515       // Unindexed loads.
16516       if (!N->hasAnyUseOfValue(0)) {
16517         // It's not safe to use the two value CombineTo variant here. e.g.
16518         // v1, chain2 = load chain1, loc
16519         // v2, chain3 = load chain2, loc
16520         // v3         = add v2, c
16521         // Now we replace use of chain2 with chain1.  This makes the second load
16522         // isomorphic to the one we are deleting, and thus makes this load live.
16523         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16524                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
16525                    dbgs() << "\n");
16526         WorklistRemover DeadNodes(*this);
16527         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16528         AddUsersToWorklist(Chain.getNode());
16529         if (N->use_empty())
16530           deleteAndRecombine(N);
16531 
16532         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16533       }
16534     } else {
16535       // Indexed loads.
16536       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16537 
16538       // If this load has an opaque TargetConstant offset, then we cannot split
16539       // the indexing into an add/sub directly (that TargetConstant may not be
16540       // valid for a different type of node, and we cannot convert an opaque
16541       // target constant into a regular constant).
16542       bool CanSplitIdx = canSplitIdx(LD);
16543 
16544       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16545         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16546         SDValue Index;
16547         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16548           Index = SplitIndexingFromLoad(LD);
16549           // Try to fold the base pointer arithmetic into subsequent loads and
16550           // stores.
16551           AddUsersToWorklist(N);
16552         } else
16553           Index = DAG.getUNDEF(N->getValueType(1));
16554         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16555                    dbgs() << "\nWith: "; Undef.dump(&DAG);
16556                    dbgs() << " and 2 other values\n");
16557         WorklistRemover DeadNodes(*this);
16558         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16559         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16560         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16561         deleteAndRecombine(N);
16562         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16563       }
16564     }
16565   }
16566 
16567   // If this load is directly stored, replace the load value with the stored
16568   // value.
16569   if (auto V = ForwardStoreValueToDirectLoad(LD))
16570     return V;
16571 
16572   // Try to infer better alignment information than the load already has.
16573   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16574     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16575       if (*Alignment > LD->getAlign() &&
16576           isAligned(*Alignment, LD->getSrcValueOffset())) {
16577         SDValue NewLoad = DAG.getExtLoad(
16578             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16579             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16580             LD->getMemOperand()->getFlags(), LD->getAAInfo());
16581         // NewLoad will always be N as we are only refining the alignment
16582         assert(NewLoad.getNode() == N);
16583         (void)NewLoad;
16584       }
16585     }
16586   }
16587 
16588   if (LD->isUnindexed()) {
16589     // Walk up chain skipping non-aliasing memory nodes.
16590     SDValue BetterChain = FindBetterChain(LD, Chain);
16591 
16592     // If there is a better chain.
16593     if (Chain != BetterChain) {
16594       SDValue ReplLoad;
16595 
16596       // Replace the chain to void dependency.
16597       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16598         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16599                                BetterChain, Ptr, LD->getMemOperand());
16600       } else {
16601         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16602                                   LD->getValueType(0),
16603                                   BetterChain, Ptr, LD->getMemoryVT(),
16604                                   LD->getMemOperand());
16605       }
16606 
16607       // Create token factor to keep old chain connected.
16608       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16609                                   MVT::Other, Chain, ReplLoad.getValue(1));
16610 
16611       // Replace uses with load result and token factor
16612       return CombineTo(N, ReplLoad.getValue(0), Token);
16613     }
16614   }
16615 
16616   // Try transforming N to an indexed load.
16617   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16618     return SDValue(N, 0);
16619 
16620   // Try to slice up N to more direct loads if the slices are mapped to
16621   // different register banks or pairing can take place.
16622   if (SliceUpLoad(N))
16623     return SDValue(N, 0);
16624 
16625   return SDValue();
16626 }
16627 
16628 namespace {
16629 
16630 /// Helper structure used to slice a load in smaller loads.
16631 /// Basically a slice is obtained from the following sequence:
16632 /// Origin = load Ty1, Base
16633 /// Shift = srl Ty1 Origin, CstTy Amount
16634 /// Inst = trunc Shift to Ty2
16635 ///
16636 /// Then, it will be rewritten into:
16637 /// Slice = load SliceTy, Base + SliceOffset
16638 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16639 ///
16640 /// SliceTy is deduced from the number of bits that are actually used to
16641 /// build Inst.
16642 struct LoadedSlice {
16643   /// Helper structure used to compute the cost of a slice.
16644   struct Cost {
16645     /// Are we optimizing for code size.
16646     bool ForCodeSize = false;
16647 
16648     /// Various cost.
16649     unsigned Loads = 0;
16650     unsigned Truncates = 0;
16651     unsigned CrossRegisterBanksCopies = 0;
16652     unsigned ZExts = 0;
16653     unsigned Shift = 0;
16654 
16655     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16656 
16657     /// Get the cost of one isolated slice.
16658     Cost(const LoadedSlice &LS, bool ForCodeSize)
16659         : ForCodeSize(ForCodeSize), Loads(1) {
16660       EVT TruncType = LS.Inst->getValueType(0);
16661       EVT LoadedType = LS.getLoadedType();
16662       if (TruncType != LoadedType &&
16663           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16664         ZExts = 1;
16665     }
16666 
16667     /// Account for slicing gain in the current cost.
16668     /// Slicing provide a few gains like removing a shift or a
16669     /// truncate. This method allows to grow the cost of the original
16670     /// load with the gain from this slice.
16671     void addSliceGain(const LoadedSlice &LS) {
16672       // Each slice saves a truncate.
16673       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16674       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16675                               LS.Inst->getValueType(0)))
16676         ++Truncates;
16677       // If there is a shift amount, this slice gets rid of it.
16678       if (LS.Shift)
16679         ++Shift;
16680       // If this slice can merge a cross register bank copy, account for it.
16681       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16682         ++CrossRegisterBanksCopies;
16683     }
16684 
16685     Cost &operator+=(const Cost &RHS) {
16686       Loads += RHS.Loads;
16687       Truncates += RHS.Truncates;
16688       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16689       ZExts += RHS.ZExts;
16690       Shift += RHS.Shift;
16691       return *this;
16692     }
16693 
16694     bool operator==(const Cost &RHS) const {
16695       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16696              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16697              ZExts == RHS.ZExts && Shift == RHS.Shift;
16698     }
16699 
16700     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16701 
16702     bool operator<(const Cost &RHS) const {
16703       // Assume cross register banks copies are as expensive as loads.
16704       // FIXME: Do we want some more target hooks?
16705       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16706       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16707       // Unless we are optimizing for code size, consider the
16708       // expensive operation first.
16709       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16710         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16711       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16712              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16713     }
16714 
16715     bool operator>(const Cost &RHS) const { return RHS < *this; }
16716 
16717     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16718 
16719     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16720   };
16721 
16722   // The last instruction that represent the slice. This should be a
16723   // truncate instruction.
16724   SDNode *Inst;
16725 
16726   // The original load instruction.
16727   LoadSDNode *Origin;
16728 
16729   // The right shift amount in bits from the original load.
16730   unsigned Shift;
16731 
16732   // The DAG from which Origin came from.
16733   // This is used to get some contextual information about legal types, etc.
16734   SelectionDAG *DAG;
16735 
16736   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16737               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16738       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16739 
16740   /// Get the bits used in a chunk of bits \p BitWidth large.
16741   /// \return Result is \p BitWidth and has used bits set to 1 and
16742   ///         not used bits set to 0.
16743   APInt getUsedBits() const {
16744     // Reproduce the trunc(lshr) sequence:
16745     // - Start from the truncated value.
16746     // - Zero extend to the desired bit width.
16747     // - Shift left.
16748     assert(Origin && "No original load to compare against.");
16749     unsigned BitWidth = Origin->getValueSizeInBits(0);
16750     assert(Inst && "This slice is not bound to an instruction");
16751     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16752            "Extracted slice is bigger than the whole type!");
16753     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16754     UsedBits.setAllBits();
16755     UsedBits = UsedBits.zext(BitWidth);
16756     UsedBits <<= Shift;
16757     return UsedBits;
16758   }
16759 
16760   /// Get the size of the slice to be loaded in bytes.
16761   unsigned getLoadedSize() const {
16762     unsigned SliceSize = getUsedBits().countPopulation();
16763     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16764     return SliceSize / 8;
16765   }
16766 
16767   /// Get the type that will be loaded for this slice.
16768   /// Note: This may not be the final type for the slice.
16769   EVT getLoadedType() const {
16770     assert(DAG && "Missing context");
16771     LLVMContext &Ctxt = *DAG->getContext();
16772     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16773   }
16774 
16775   /// Get the alignment of the load used for this slice.
16776   Align getAlign() const {
16777     Align Alignment = Origin->getAlign();
16778     uint64_t Offset = getOffsetFromBase();
16779     if (Offset != 0)
16780       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16781     return Alignment;
16782   }
16783 
16784   /// Check if this slice can be rewritten with legal operations.
16785   bool isLegal() const {
16786     // An invalid slice is not legal.
16787     if (!Origin || !Inst || !DAG)
16788       return false;
16789 
16790     // Offsets are for indexed load only, we do not handle that.
16791     if (!Origin->getOffset().isUndef())
16792       return false;
16793 
16794     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16795 
16796     // Check that the type is legal.
16797     EVT SliceType = getLoadedType();
16798     if (!TLI.isTypeLegal(SliceType))
16799       return false;
16800 
16801     // Check that the load is legal for this type.
16802     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16803       return false;
16804 
16805     // Check that the offset can be computed.
16806     // 1. Check its type.
16807     EVT PtrType = Origin->getBasePtr().getValueType();
16808     if (PtrType == MVT::Untyped || PtrType.isExtended())
16809       return false;
16810 
16811     // 2. Check that it fits in the immediate.
16812     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16813       return false;
16814 
16815     // 3. Check that the computation is legal.
16816     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16817       return false;
16818 
16819     // Check that the zext is legal if it needs one.
16820     EVT TruncateType = Inst->getValueType(0);
16821     if (TruncateType != SliceType &&
16822         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
16823       return false;
16824 
16825     return true;
16826   }
16827 
16828   /// Get the offset in bytes of this slice in the original chunk of
16829   /// bits.
16830   /// \pre DAG != nullptr.
16831   uint64_t getOffsetFromBase() const {
16832     assert(DAG && "Missing context.");
16833     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
16834     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
16835     uint64_t Offset = Shift / 8;
16836     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
16837     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
16838            "The size of the original loaded type is not a multiple of a"
16839            " byte.");
16840     // If Offset is bigger than TySizeInBytes, it means we are loading all
16841     // zeros. This should have been optimized before in the process.
16842     assert(TySizeInBytes > Offset &&
16843            "Invalid shift amount for given loaded size");
16844     if (IsBigEndian)
16845       Offset = TySizeInBytes - Offset - getLoadedSize();
16846     return Offset;
16847   }
16848 
16849   /// Generate the sequence of instructions to load the slice
16850   /// represented by this object and redirect the uses of this slice to
16851   /// this new sequence of instructions.
16852   /// \pre this->Inst && this->Origin are valid Instructions and this
16853   /// object passed the legal check: LoadedSlice::isLegal returned true.
16854   /// \return The last instruction of the sequence used to load the slice.
16855   SDValue loadSlice() const {
16856     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16857     const SDValue &OldBaseAddr = Origin->getBasePtr();
16858     SDValue BaseAddr = OldBaseAddr;
16859     // Get the offset in that chunk of bytes w.r.t. the endianness.
16860     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16861     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16862     if (Offset) {
16863       // BaseAddr = BaseAddr + Offset.
16864       EVT ArithType = BaseAddr.getValueType();
16865       SDLoc DL(Origin);
16866       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16867                               DAG->getConstant(Offset, DL, ArithType));
16868     }
16869 
16870     // Create the type of the loaded slice according to its size.
16871     EVT SliceType = getLoadedType();
16872 
16873     // Create the load for the slice.
16874     SDValue LastInst =
16875         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16876                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16877                      Origin->getMemOperand()->getFlags());
16878     // If the final type is not the same as the loaded type, this means that
16879     // we have to pad with zero. Create a zero extend for that.
16880     EVT FinalType = Inst->getValueType(0);
16881     if (SliceType != FinalType)
16882       LastInst =
16883           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16884     return LastInst;
16885   }
16886 
16887   /// Check if this slice can be merged with an expensive cross register
16888   /// bank copy. E.g.,
16889   /// i = load i32
16890   /// f = bitcast i32 i to float
16891   bool canMergeExpensiveCrossRegisterBankCopy() const {
16892     if (!Inst || !Inst->hasOneUse())
16893       return false;
16894     SDNode *Use = *Inst->use_begin();
16895     if (Use->getOpcode() != ISD::BITCAST)
16896       return false;
16897     assert(DAG && "Missing context");
16898     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16899     EVT ResVT = Use->getValueType(0);
16900     const TargetRegisterClass *ResRC =
16901         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16902     const TargetRegisterClass *ArgRC =
16903         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16904                            Use->getOperand(0)->isDivergent());
16905     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16906       return false;
16907 
16908     // At this point, we know that we perform a cross-register-bank copy.
16909     // Check if it is expensive.
16910     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16911     // Assume bitcasts are cheap, unless both register classes do not
16912     // explicitly share a common sub class.
16913     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16914       return false;
16915 
16916     // Check if it will be merged with the load.
16917     // 1. Check the alignment / fast memory access constraint.
16918     bool IsFast = false;
16919     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
16920                                 Origin->getAddressSpace(), getAlign(),
16921                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
16922         !IsFast)
16923       return false;
16924 
16925     // 2. Check that the load is a legal operation for that type.
16926     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16927       return false;
16928 
16929     // 3. Check that we do not have a zext in the way.
16930     if (Inst->getValueType(0) != getLoadedType())
16931       return false;
16932 
16933     return true;
16934   }
16935 };
16936 
16937 } // end anonymous namespace
16938 
16939 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16940 /// \p UsedBits looks like 0..0 1..1 0..0.
16941 static bool areUsedBitsDense(const APInt &UsedBits) {
16942   // If all the bits are one, this is dense!
16943   if (UsedBits.isAllOnes())
16944     return true;
16945 
16946   // Get rid of the unused bits on the right.
16947   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16948   // Get rid of the unused bits on the left.
16949   if (NarrowedUsedBits.countLeadingZeros())
16950     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16951   // Check that the chunk of bits is completely used.
16952   return NarrowedUsedBits.isAllOnes();
16953 }
16954 
16955 /// Check whether or not \p First and \p Second are next to each other
16956 /// in memory. This means that there is no hole between the bits loaded
16957 /// by \p First and the bits loaded by \p Second.
16958 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16959                                      const LoadedSlice &Second) {
16960   assert(First.Origin == Second.Origin && First.Origin &&
16961          "Unable to match different memory origins.");
16962   APInt UsedBits = First.getUsedBits();
16963   assert((UsedBits & Second.getUsedBits()) == 0 &&
16964          "Slices are not supposed to overlap.");
16965   UsedBits |= Second.getUsedBits();
16966   return areUsedBitsDense(UsedBits);
16967 }
16968 
16969 /// Adjust the \p GlobalLSCost according to the target
16970 /// paring capabilities and the layout of the slices.
16971 /// \pre \p GlobalLSCost should account for at least as many loads as
16972 /// there is in the slices in \p LoadedSlices.
16973 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16974                                  LoadedSlice::Cost &GlobalLSCost) {
16975   unsigned NumberOfSlices = LoadedSlices.size();
16976   // If there is less than 2 elements, no pairing is possible.
16977   if (NumberOfSlices < 2)
16978     return;
16979 
16980   // Sort the slices so that elements that are likely to be next to each
16981   // other in memory are next to each other in the list.
16982   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16983     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16984     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16985   });
16986   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16987   // First (resp. Second) is the first (resp. Second) potentially candidate
16988   // to be placed in a paired load.
16989   const LoadedSlice *First = nullptr;
16990   const LoadedSlice *Second = nullptr;
16991   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16992                 // Set the beginning of the pair.
16993                                                            First = Second) {
16994     Second = &LoadedSlices[CurrSlice];
16995 
16996     // If First is NULL, it means we start a new pair.
16997     // Get to the next slice.
16998     if (!First)
16999       continue;
17000 
17001     EVT LoadedType = First->getLoadedType();
17002 
17003     // If the types of the slices are different, we cannot pair them.
17004     if (LoadedType != Second->getLoadedType())
17005       continue;
17006 
17007     // Check if the target supplies paired loads for this type.
17008     Align RequiredAlignment;
17009     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
17010       // move to the next pair, this type is hopeless.
17011       Second = nullptr;
17012       continue;
17013     }
17014     // Check if we meet the alignment requirement.
17015     if (First->getAlign() < RequiredAlignment)
17016       continue;
17017 
17018     // Check that both loads are next to each other in memory.
17019     if (!areSlicesNextToEachOther(*First, *Second))
17020       continue;
17021 
17022     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
17023     --GlobalLSCost.Loads;
17024     // Move to the next pair.
17025     Second = nullptr;
17026   }
17027 }
17028 
17029 /// Check the profitability of all involved LoadedSlice.
17030 /// Currently, it is considered profitable if there is exactly two
17031 /// involved slices (1) which are (2) next to each other in memory, and
17032 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
17033 ///
17034 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
17035 /// the elements themselves.
17036 ///
17037 /// FIXME: When the cost model will be mature enough, we can relax
17038 /// constraints (1) and (2).
17039 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17040                                 const APInt &UsedBits, bool ForCodeSize) {
17041   unsigned NumberOfSlices = LoadedSlices.size();
17042   if (StressLoadSlicing)
17043     return NumberOfSlices > 1;
17044 
17045   // Check (1).
17046   if (NumberOfSlices != 2)
17047     return false;
17048 
17049   // Check (2).
17050   if (!areUsedBitsDense(UsedBits))
17051     return false;
17052 
17053   // Check (3).
17054   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
17055   // The original code has one big load.
17056   OrigCost.Loads = 1;
17057   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
17058     const LoadedSlice &LS = LoadedSlices[CurrSlice];
17059     // Accumulate the cost of all the slices.
17060     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
17061     GlobalSlicingCost += SliceCost;
17062 
17063     // Account as cost in the original configuration the gain obtained
17064     // with the current slices.
17065     OrigCost.addSliceGain(LS);
17066   }
17067 
17068   // If the target supports paired load, adjust the cost accordingly.
17069   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
17070   return OrigCost > GlobalSlicingCost;
17071 }
17072 
17073 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
17074 /// operations, split it in the various pieces being extracted.
17075 ///
17076 /// This sort of thing is introduced by SROA.
17077 /// This slicing takes care not to insert overlapping loads.
17078 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
17079 bool DAGCombiner::SliceUpLoad(SDNode *N) {
17080   if (Level < AfterLegalizeDAG)
17081     return false;
17082 
17083   LoadSDNode *LD = cast<LoadSDNode>(N);
17084   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
17085       !LD->getValueType(0).isInteger())
17086     return false;
17087 
17088   // The algorithm to split up a load of a scalable vector into individual
17089   // elements currently requires knowing the length of the loaded type,
17090   // so will need adjusting to work on scalable vectors.
17091   if (LD->getValueType(0).isScalableVector())
17092     return false;
17093 
17094   // Keep track of already used bits to detect overlapping values.
17095   // In that case, we will just abort the transformation.
17096   APInt UsedBits(LD->getValueSizeInBits(0), 0);
17097 
17098   SmallVector<LoadedSlice, 4> LoadedSlices;
17099 
17100   // Check if this load is used as several smaller chunks of bits.
17101   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
17102   // of computation for each trunc.
17103   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
17104        UI != UIEnd; ++UI) {
17105     // Skip the uses of the chain.
17106     if (UI.getUse().getResNo() != 0)
17107       continue;
17108 
17109     SDNode *User = *UI;
17110     unsigned Shift = 0;
17111 
17112     // Check if this is a trunc(lshr).
17113     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
17114         isa<ConstantSDNode>(User->getOperand(1))) {
17115       Shift = User->getConstantOperandVal(1);
17116       User = *User->use_begin();
17117     }
17118 
17119     // At this point, User is a Truncate, iff we encountered, trunc or
17120     // trunc(lshr).
17121     if (User->getOpcode() != ISD::TRUNCATE)
17122       return false;
17123 
17124     // The width of the type must be a power of 2 and greater than 8-bits.
17125     // Otherwise the load cannot be represented in LLVM IR.
17126     // Moreover, if we shifted with a non-8-bits multiple, the slice
17127     // will be across several bytes. We do not support that.
17128     unsigned Width = User->getValueSizeInBits(0);
17129     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
17130       return false;
17131 
17132     // Build the slice for this chain of computations.
17133     LoadedSlice LS(User, LD, Shift, &DAG);
17134     APInt CurrentUsedBits = LS.getUsedBits();
17135 
17136     // Check if this slice overlaps with another.
17137     if ((CurrentUsedBits & UsedBits) != 0)
17138       return false;
17139     // Update the bits used globally.
17140     UsedBits |= CurrentUsedBits;
17141 
17142     // Check if the new slice would be legal.
17143     if (!LS.isLegal())
17144       return false;
17145 
17146     // Record the slice.
17147     LoadedSlices.push_back(LS);
17148   }
17149 
17150   // Abort slicing if it does not seem to be profitable.
17151   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
17152     return false;
17153 
17154   ++SlicedLoads;
17155 
17156   // Rewrite each chain to use an independent load.
17157   // By construction, each chain can be represented by a unique load.
17158 
17159   // Prepare the argument for the new token factor for all the slices.
17160   SmallVector<SDValue, 8> ArgChains;
17161   for (const LoadedSlice &LS : LoadedSlices) {
17162     SDValue SliceInst = LS.loadSlice();
17163     CombineTo(LS.Inst, SliceInst, true);
17164     if (SliceInst.getOpcode() != ISD::LOAD)
17165       SliceInst = SliceInst.getOperand(0);
17166     assert(SliceInst->getOpcode() == ISD::LOAD &&
17167            "It takes more than a zext to get to the loaded slice!!");
17168     ArgChains.push_back(SliceInst.getValue(1));
17169   }
17170 
17171   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
17172                               ArgChains);
17173   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
17174   AddToWorklist(Chain.getNode());
17175   return true;
17176 }
17177 
17178 /// Check to see if V is (and load (ptr), imm), where the load is having
17179 /// specific bytes cleared out.  If so, return the byte size being masked out
17180 /// and the shift amount.
17181 static std::pair<unsigned, unsigned>
17182 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
17183   std::pair<unsigned, unsigned> Result(0, 0);
17184 
17185   // Check for the structure we're looking for.
17186   if (V->getOpcode() != ISD::AND ||
17187       !isa<ConstantSDNode>(V->getOperand(1)) ||
17188       !ISD::isNormalLoad(V->getOperand(0).getNode()))
17189     return Result;
17190 
17191   // Check the chain and pointer.
17192   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
17193   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
17194 
17195   // This only handles simple types.
17196   if (V.getValueType() != MVT::i16 &&
17197       V.getValueType() != MVT::i32 &&
17198       V.getValueType() != MVT::i64)
17199     return Result;
17200 
17201   // Check the constant mask.  Invert it so that the bits being masked out are
17202   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
17203   // follow the sign bit for uniformity.
17204   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
17205   unsigned NotMaskLZ = countLeadingZeros(NotMask);
17206   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
17207   unsigned NotMaskTZ = countTrailingZeros(NotMask);
17208   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
17209   if (NotMaskLZ == 64) return Result;  // All zero mask.
17210 
17211   // See if we have a continuous run of bits.  If so, we have 0*1+0*
17212   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
17213     return Result;
17214 
17215   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
17216   if (V.getValueType() != MVT::i64 && NotMaskLZ)
17217     NotMaskLZ -= 64-V.getValueSizeInBits();
17218 
17219   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
17220   switch (MaskedBytes) {
17221   case 1:
17222   case 2:
17223   case 4: break;
17224   default: return Result; // All one mask, or 5-byte mask.
17225   }
17226 
17227   // Verify that the first bit starts at a multiple of mask so that the access
17228   // is aligned the same as the access width.
17229   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
17230 
17231   // For narrowing to be valid, it must be the case that the load the
17232   // immediately preceding memory operation before the store.
17233   if (LD == Chain.getNode())
17234     ; // ok.
17235   else if (Chain->getOpcode() == ISD::TokenFactor &&
17236            SDValue(LD, 1).hasOneUse()) {
17237     // LD has only 1 chain use so they are no indirect dependencies.
17238     if (!LD->isOperandOf(Chain.getNode()))
17239       return Result;
17240   } else
17241     return Result; // Fail.
17242 
17243   Result.first = MaskedBytes;
17244   Result.second = NotMaskTZ/8;
17245   return Result;
17246 }
17247 
17248 /// Check to see if IVal is something that provides a value as specified by
17249 /// MaskInfo. If so, replace the specified store with a narrower store of
17250 /// truncated IVal.
17251 static SDValue
17252 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17253                                 SDValue IVal, StoreSDNode *St,
17254                                 DAGCombiner *DC) {
17255   unsigned NumBytes = MaskInfo.first;
17256   unsigned ByteShift = MaskInfo.second;
17257   SelectionDAG &DAG = DC->getDAG();
17258 
17259   // Check to see if IVal is all zeros in the part being masked in by the 'or'
17260   // that uses this.  If not, this is not a replacement.
17261   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17262                                   ByteShift*8, (ByteShift+NumBytes)*8);
17263   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17264 
17265   // Check that it is legal on the target to do this.  It is legal if the new
17266   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17267   // legalization (and the target doesn't explicitly think this is a bad idea).
17268   MVT VT = MVT::getIntegerVT(NumBytes * 8);
17269   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17270   if (!DC->isTypeLegal(VT))
17271     return SDValue();
17272   if (St->getMemOperand() &&
17273       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17274                               *St->getMemOperand()))
17275     return SDValue();
17276 
17277   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
17278   // shifted by ByteShift and truncated down to NumBytes.
17279   if (ByteShift) {
17280     SDLoc DL(IVal);
17281     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17282                        DAG.getConstant(ByteShift*8, DL,
17283                                     DC->getShiftAmountTy(IVal.getValueType())));
17284   }
17285 
17286   // Figure out the offset for the store and the alignment of the access.
17287   unsigned StOffset;
17288   if (DAG.getDataLayout().isLittleEndian())
17289     StOffset = ByteShift;
17290   else
17291     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17292 
17293   SDValue Ptr = St->getBasePtr();
17294   if (StOffset) {
17295     SDLoc DL(IVal);
17296     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17297   }
17298 
17299   // Truncate down to the new size.
17300   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17301 
17302   ++OpsNarrowed;
17303   return DAG
17304       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17305                 St->getPointerInfo().getWithOffset(StOffset),
17306                 St->getOriginalAlign());
17307 }
17308 
17309 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17310 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17311 /// narrowing the load and store if it would end up being a win for performance
17312 /// or code size.
17313 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17314   StoreSDNode *ST  = cast<StoreSDNode>(N);
17315   if (!ST->isSimple())
17316     return SDValue();
17317 
17318   SDValue Chain = ST->getChain();
17319   SDValue Value = ST->getValue();
17320   SDValue Ptr   = ST->getBasePtr();
17321   EVT VT = Value.getValueType();
17322 
17323   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
17324     return SDValue();
17325 
17326   unsigned Opc = Value.getOpcode();
17327 
17328   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17329   // is a byte mask indicating a consecutive number of bytes, check to see if
17330   // Y is known to provide just those bytes.  If so, we try to replace the
17331   // load + replace + store sequence with a single (narrower) store, which makes
17332   // the load dead.
17333   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
17334     std::pair<unsigned, unsigned> MaskedLoad;
17335     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17336     if (MaskedLoad.first)
17337       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17338                                                   Value.getOperand(1), ST,this))
17339         return NewST;
17340 
17341     // Or is commutative, so try swapping X and Y.
17342     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17343     if (MaskedLoad.first)
17344       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17345                                                   Value.getOperand(0), ST,this))
17346         return NewST;
17347   }
17348 
17349   if (!EnableReduceLoadOpStoreWidth)
17350     return SDValue();
17351 
17352   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17353       Value.getOperand(1).getOpcode() != ISD::Constant)
17354     return SDValue();
17355 
17356   SDValue N0 = Value.getOperand(0);
17357   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17358       Chain == SDValue(N0.getNode(), 1)) {
17359     LoadSDNode *LD = cast<LoadSDNode>(N0);
17360     if (LD->getBasePtr() != Ptr ||
17361         LD->getPointerInfo().getAddrSpace() !=
17362         ST->getPointerInfo().getAddrSpace())
17363       return SDValue();
17364 
17365     // Find the type to narrow it the load / op / store to.
17366     SDValue N1 = Value.getOperand(1);
17367     unsigned BitWidth = N1.getValueSizeInBits();
17368     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17369     if (Opc == ISD::AND)
17370       Imm ^= APInt::getAllOnes(BitWidth);
17371     if (Imm == 0 || Imm.isAllOnes())
17372       return SDValue();
17373     unsigned ShAmt = Imm.countTrailingZeros();
17374     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17375     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17376     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17377     // The narrowing should be profitable, the load/store operation should be
17378     // legal (or custom) and the store size should be equal to the NewVT width.
17379     while (NewBW < BitWidth &&
17380            (NewVT.getStoreSizeInBits() != NewBW ||
17381             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17382             !TLI.isNarrowingProfitable(VT, NewVT))) {
17383       NewBW = NextPowerOf2(NewBW);
17384       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17385     }
17386     if (NewBW >= BitWidth)
17387       return SDValue();
17388 
17389     // If the lsb changed does not start at the type bitwidth boundary,
17390     // start at the previous one.
17391     if (ShAmt % NewBW)
17392       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17393     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17394                                    std::min(BitWidth, ShAmt + NewBW));
17395     if ((Imm & Mask) == Imm) {
17396       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17397       if (Opc == ISD::AND)
17398         NewImm ^= APInt::getAllOnes(NewBW);
17399       uint64_t PtrOff = ShAmt / 8;
17400       // For big endian targets, we need to adjust the offset to the pointer to
17401       // load the correct bytes.
17402       if (DAG.getDataLayout().isBigEndian())
17403         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17404 
17405       bool IsFast = false;
17406       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17407       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17408                                   LD->getAddressSpace(), NewAlign,
17409                                   LD->getMemOperand()->getFlags(), &IsFast) ||
17410           !IsFast)
17411         return SDValue();
17412 
17413       SDValue NewPtr =
17414           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17415       SDValue NewLD =
17416           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17417                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17418                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
17419       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17420                                    DAG.getConstant(NewImm, SDLoc(Value),
17421                                                    NewVT));
17422       SDValue NewST =
17423           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17424                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17425 
17426       AddToWorklist(NewPtr.getNode());
17427       AddToWorklist(NewLD.getNode());
17428       AddToWorklist(NewVal.getNode());
17429       WorklistRemover DeadNodes(*this);
17430       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17431       ++OpsNarrowed;
17432       return NewST;
17433     }
17434   }
17435 
17436   return SDValue();
17437 }
17438 
17439 /// For a given floating point load / store pair, if the load value isn't used
17440 /// by any other operations, then consider transforming the pair to integer
17441 /// load / store operations if the target deems the transformation profitable.
17442 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17443   StoreSDNode *ST  = cast<StoreSDNode>(N);
17444   SDValue Value = ST->getValue();
17445   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17446       Value.hasOneUse()) {
17447     LoadSDNode *LD = cast<LoadSDNode>(Value);
17448     EVT VT = LD->getMemoryVT();
17449     if (!VT.isFloatingPoint() ||
17450         VT != ST->getMemoryVT() ||
17451         LD->isNonTemporal() ||
17452         ST->isNonTemporal() ||
17453         LD->getPointerInfo().getAddrSpace() != 0 ||
17454         ST->getPointerInfo().getAddrSpace() != 0)
17455       return SDValue();
17456 
17457     TypeSize VTSize = VT.getSizeInBits();
17458 
17459     // We don't know the size of scalable types at compile time so we cannot
17460     // create an integer of the equivalent size.
17461     if (VTSize.isScalable())
17462       return SDValue();
17463 
17464     bool FastLD = false, FastST = false;
17465     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17466     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17467         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17468         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17469         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17470         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17471                                 *LD->getMemOperand(), &FastLD) ||
17472         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17473                                 *ST->getMemOperand(), &FastST) ||
17474         !FastLD || !FastST)
17475       return SDValue();
17476 
17477     SDValue NewLD =
17478         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17479                     LD->getPointerInfo(), LD->getAlign());
17480 
17481     SDValue NewST =
17482         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17483                      ST->getPointerInfo(), ST->getAlign());
17484 
17485     AddToWorklist(NewLD.getNode());
17486     AddToWorklist(NewST.getNode());
17487     WorklistRemover DeadNodes(*this);
17488     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17489     ++LdStFP2Int;
17490     return NewST;
17491   }
17492 
17493   return SDValue();
17494 }
17495 
17496 // This is a helper function for visitMUL to check the profitability
17497 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17498 // MulNode is the original multiply, AddNode is (add x, c1),
17499 // and ConstNode is c2.
17500 //
17501 // If the (add x, c1) has multiple uses, we could increase
17502 // the number of adds if we make this transformation.
17503 // It would only be worth doing this if we can remove a
17504 // multiply in the process. Check for that here.
17505 // To illustrate:
17506 //     (A + c1) * c3
17507 //     (A + c2) * c3
17508 // We're checking for cases where we have common "c3 * A" expressions.
17509 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
17510                                               SDValue ConstNode) {
17511   APInt Val;
17512 
17513   // If the add only has one use, and the target thinks the folding is
17514   // profitable or does not lead to worse code, this would be OK to do.
17515   if (AddNode->hasOneUse() &&
17516       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17517     return true;
17518 
17519   // Walk all the users of the constant with which we're multiplying.
17520   for (SDNode *Use : ConstNode->uses()) {
17521     if (Use == MulNode) // This use is the one we're on right now. Skip it.
17522       continue;
17523 
17524     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17525       SDNode *OtherOp;
17526       SDNode *MulVar = AddNode.getOperand(0).getNode();
17527 
17528       // OtherOp is what we're multiplying against the constant.
17529       if (Use->getOperand(0) == ConstNode)
17530         OtherOp = Use->getOperand(1).getNode();
17531       else
17532         OtherOp = Use->getOperand(0).getNode();
17533 
17534       // Check to see if multiply is with the same operand of our "add".
17535       //
17536       //     ConstNode  = CONST
17537       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
17538       //     ...
17539       //     AddNode  = (A + c1)  <-- MulVar is A.
17540       //         = AddNode * ConstNode   <-- current visiting instruction.
17541       //
17542       // If we make this transformation, we will have a common
17543       // multiply (ConstNode * A) that we can save.
17544       if (OtherOp == MulVar)
17545         return true;
17546 
17547       // Now check to see if a future expansion will give us a common
17548       // multiply.
17549       //
17550       //     ConstNode  = CONST
17551       //     AddNode    = (A + c1)
17552       //     ...   = AddNode * ConstNode <-- current visiting instruction.
17553       //     ...
17554       //     OtherOp = (A + c2)
17555       //     Use     = OtherOp * ConstNode <-- visiting Use.
17556       //
17557       // If we make this transformation, we will have a common
17558       // multiply (CONST * A) after we also do the same transformation
17559       // to the "t2" instruction.
17560       if (OtherOp->getOpcode() == ISD::ADD &&
17561           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17562           OtherOp->getOperand(0).getNode() == MulVar)
17563         return true;
17564     }
17565   }
17566 
17567   // Didn't find a case where this would be profitable.
17568   return false;
17569 }
17570 
17571 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17572                                          unsigned NumStores) {
17573   SmallVector<SDValue, 8> Chains;
17574   SmallPtrSet<const SDNode *, 8> Visited;
17575   SDLoc StoreDL(StoreNodes[0].MemNode);
17576 
17577   for (unsigned i = 0; i < NumStores; ++i) {
17578     Visited.insert(StoreNodes[i].MemNode);
17579   }
17580 
17581   // don't include nodes that are children or repeated nodes.
17582   for (unsigned i = 0; i < NumStores; ++i) {
17583     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17584       Chains.push_back(StoreNodes[i].MemNode->getChain());
17585   }
17586 
17587   assert(Chains.size() > 0 && "Chain should have generated a chain");
17588   return DAG.getTokenFactor(StoreDL, Chains);
17589 }
17590 
17591 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17592     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17593     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17594   // Make sure we have something to merge.
17595   if (NumStores < 2)
17596     return false;
17597 
17598   assert((!UseTrunc || !UseVector) &&
17599          "This optimization cannot emit a vector truncating store");
17600 
17601   // The latest Node in the DAG.
17602   SDLoc DL(StoreNodes[0].MemNode);
17603 
17604   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17605   unsigned SizeInBits = NumStores * ElementSizeBits;
17606   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17607 
17608   Optional<MachineMemOperand::Flags> Flags;
17609   AAMDNodes AAInfo;
17610   for (unsigned I = 0; I != NumStores; ++I) {
17611     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17612     if (!Flags) {
17613       Flags = St->getMemOperand()->getFlags();
17614       AAInfo = St->getAAInfo();
17615       continue;
17616     }
17617     // Skip merging if there's an inconsistent flag.
17618     if (Flags != St->getMemOperand()->getFlags())
17619       return false;
17620     // Concatenate AA metadata.
17621     AAInfo = AAInfo.concat(St->getAAInfo());
17622   }
17623 
17624   EVT StoreTy;
17625   if (UseVector) {
17626     unsigned Elts = NumStores * NumMemElts;
17627     // Get the type for the merged vector store.
17628     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17629   } else
17630     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17631 
17632   SDValue StoredVal;
17633   if (UseVector) {
17634     if (IsConstantSrc) {
17635       SmallVector<SDValue, 8> BuildVector;
17636       for (unsigned I = 0; I != NumStores; ++I) {
17637         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17638         SDValue Val = St->getValue();
17639         // If constant is of the wrong type, convert it now.
17640         if (MemVT != Val.getValueType()) {
17641           Val = peekThroughBitcasts(Val);
17642           // Deal with constants of wrong size.
17643           if (ElementSizeBits != Val.getValueSizeInBits()) {
17644             EVT IntMemVT =
17645                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17646             if (isa<ConstantFPSDNode>(Val)) {
17647               // Not clear how to truncate FP values.
17648               return false;
17649             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
17650               Val = DAG.getConstant(C->getAPIntValue()
17651                                         .zextOrTrunc(Val.getValueSizeInBits())
17652                                         .zextOrTrunc(ElementSizeBits),
17653                                     SDLoc(C), IntMemVT);
17654           }
17655           // Make sure correctly size type is the correct type.
17656           Val = DAG.getBitcast(MemVT, Val);
17657         }
17658         BuildVector.push_back(Val);
17659       }
17660       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17661                                                : ISD::BUILD_VECTOR,
17662                               DL, StoreTy, BuildVector);
17663     } else {
17664       SmallVector<SDValue, 8> Ops;
17665       for (unsigned i = 0; i < NumStores; ++i) {
17666         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17667         SDValue Val = peekThroughBitcasts(St->getValue());
17668         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17669         // type MemVT. If the underlying value is not the correct
17670         // type, but it is an extraction of an appropriate vector we
17671         // can recast Val to be of the correct type. This may require
17672         // converting between EXTRACT_VECTOR_ELT and
17673         // EXTRACT_SUBVECTOR.
17674         if ((MemVT != Val.getValueType()) &&
17675             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17676              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17677           EVT MemVTScalarTy = MemVT.getScalarType();
17678           // We may need to add a bitcast here to get types to line up.
17679           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17680             Val = DAG.getBitcast(MemVT, Val);
17681           } else {
17682             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17683                                             : ISD::EXTRACT_VECTOR_ELT;
17684             SDValue Vec = Val.getOperand(0);
17685             SDValue Idx = Val.getOperand(1);
17686             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17687           }
17688         }
17689         Ops.push_back(Val);
17690       }
17691 
17692       // Build the extracted vector elements back into a vector.
17693       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17694                                                : ISD::BUILD_VECTOR,
17695                               DL, StoreTy, Ops);
17696     }
17697   } else {
17698     // We should always use a vector store when merging extracted vector
17699     // elements, so this path implies a store of constants.
17700     assert(IsConstantSrc && "Merged vector elements should use vector store");
17701 
17702     APInt StoreInt(SizeInBits, 0);
17703 
17704     // Construct a single integer constant which is made of the smaller
17705     // constant inputs.
17706     bool IsLE = DAG.getDataLayout().isLittleEndian();
17707     for (unsigned i = 0; i < NumStores; ++i) {
17708       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17709       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17710 
17711       SDValue Val = St->getValue();
17712       Val = peekThroughBitcasts(Val);
17713       StoreInt <<= ElementSizeBits;
17714       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17715         StoreInt |= C->getAPIntValue()
17716                         .zextOrTrunc(ElementSizeBits)
17717                         .zextOrTrunc(SizeInBits);
17718       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17719         StoreInt |= C->getValueAPF()
17720                         .bitcastToAPInt()
17721                         .zextOrTrunc(ElementSizeBits)
17722                         .zextOrTrunc(SizeInBits);
17723         // If fp truncation is necessary give up for now.
17724         if (MemVT.getSizeInBits() != ElementSizeBits)
17725           return false;
17726       } else {
17727         llvm_unreachable("Invalid constant element type");
17728       }
17729     }
17730 
17731     // Create the new Load and Store operations.
17732     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17733   }
17734 
17735   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17736   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17737 
17738   // make sure we use trunc store if it's necessary to be legal.
17739   SDValue NewStore;
17740   if (!UseTrunc) {
17741     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17742                             FirstInChain->getPointerInfo(),
17743                             FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17744   } else { // Must be realized as a trunc store
17745     EVT LegalizedStoredValTy =
17746         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17747     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17748     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17749     SDValue ExtendedStoreVal =
17750         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17751                         LegalizedStoredValTy);
17752     NewStore = DAG.getTruncStore(
17753         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17754         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17755         FirstInChain->getAlign(), Flags.getValue(), AAInfo);
17756   }
17757 
17758   // Replace all merged stores with the new store.
17759   for (unsigned i = 0; i < NumStores; ++i)
17760     CombineTo(StoreNodes[i].MemNode, NewStore);
17761 
17762   AddToWorklist(NewChain.getNode());
17763   return true;
17764 }
17765 
17766 void DAGCombiner::getStoreMergeCandidates(
17767     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17768     SDNode *&RootNode) {
17769   // This holds the base pointer, index, and the offset in bytes from the base
17770   // pointer. We must have a base and an offset. Do not handle stores to undef
17771   // base pointers.
17772   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17773   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17774     return;
17775 
17776   SDValue Val = peekThroughBitcasts(St->getValue());
17777   StoreSource StoreSrc = getStoreSource(Val);
17778   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17779 
17780   // Match on loadbaseptr if relevant.
17781   EVT MemVT = St->getMemoryVT();
17782   BaseIndexOffset LBasePtr;
17783   EVT LoadVT;
17784   if (StoreSrc == StoreSource::Load) {
17785     auto *Ld = cast<LoadSDNode>(Val);
17786     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17787     LoadVT = Ld->getMemoryVT();
17788     // Load and store should be the same type.
17789     if (MemVT != LoadVT)
17790       return;
17791     // Loads must only have one use.
17792     if (!Ld->hasNUsesOfValue(1, 0))
17793       return;
17794     // The memory operands must not be volatile/indexed/atomic.
17795     // TODO: May be able to relax for unordered atomics (see D66309)
17796     if (!Ld->isSimple() || Ld->isIndexed())
17797       return;
17798   }
17799   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17800                             int64_t &Offset) -> bool {
17801     // The memory operands must not be volatile/indexed/atomic.
17802     // TODO: May be able to relax for unordered atomics (see D66309)
17803     if (!Other->isSimple() || Other->isIndexed())
17804       return false;
17805     // Don't mix temporal stores with non-temporal stores.
17806     if (St->isNonTemporal() != Other->isNonTemporal())
17807       return false;
17808     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
17809     // Allow merging constants of different types as integers.
17810     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
17811                                            : Other->getMemoryVT() != MemVT;
17812     switch (StoreSrc) {
17813     case StoreSource::Load: {
17814       if (NoTypeMatch)
17815         return false;
17816       // The Load's Base Ptr must also match.
17817       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
17818       if (!OtherLd)
17819         return false;
17820       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
17821       if (LoadVT != OtherLd->getMemoryVT())
17822         return false;
17823       // Loads must only have one use.
17824       if (!OtherLd->hasNUsesOfValue(1, 0))
17825         return false;
17826       // The memory operands must not be volatile/indexed/atomic.
17827       // TODO: May be able to relax for unordered atomics (see D66309)
17828       if (!OtherLd->isSimple() || OtherLd->isIndexed())
17829         return false;
17830       // Don't mix temporal loads with non-temporal loads.
17831       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
17832         return false;
17833       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
17834         return false;
17835       break;
17836     }
17837     case StoreSource::Constant:
17838       if (NoTypeMatch)
17839         return false;
17840       if (!isIntOrFPConstant(OtherBC))
17841         return false;
17842       break;
17843     case StoreSource::Extract:
17844       // Do not merge truncated stores here.
17845       if (Other->isTruncatingStore())
17846         return false;
17847       if (!MemVT.bitsEq(OtherBC.getValueType()))
17848         return false;
17849       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
17850           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17851         return false;
17852       break;
17853     default:
17854       llvm_unreachable("Unhandled store source for merging");
17855     }
17856     Ptr = BaseIndexOffset::match(Other, DAG);
17857     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
17858   };
17859 
17860   // Check if the pair of StoreNode and the RootNode already bail out many
17861   // times which is over the limit in dependence check.
17862   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
17863                                         SDNode *RootNode) -> bool {
17864     auto RootCount = StoreRootCountMap.find(StoreNode);
17865     return RootCount != StoreRootCountMap.end() &&
17866            RootCount->second.first == RootNode &&
17867            RootCount->second.second > StoreMergeDependenceLimit;
17868   };
17869 
17870   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
17871     // This must be a chain use.
17872     if (UseIter.getOperandNo() != 0)
17873       return;
17874     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
17875       BaseIndexOffset Ptr;
17876       int64_t PtrDiff;
17877       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17878           !OverLimitInDependenceCheck(OtherStore, RootNode))
17879         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17880     }
17881   };
17882 
17883   // We looking for a root node which is an ancestor to all mergable
17884   // stores. We search up through a load, to our root and then down
17885   // through all children. For instance we will find Store{1,2,3} if
17886   // St is Store1, Store2. or Store3 where the root is not a load
17887   // which always true for nonvolatile ops. TODO: Expand
17888   // the search to find all valid candidates through multiple layers of loads.
17889   //
17890   // Root
17891   // |-------|-------|
17892   // Load    Load    Store3
17893   // |       |
17894   // Store1   Store2
17895   //
17896   // FIXME: We should be able to climb and
17897   // descend TokenFactors to find candidates as well.
17898 
17899   RootNode = St->getChain().getNode();
17900 
17901   unsigned NumNodesExplored = 0;
17902   const unsigned MaxSearchNodes = 1024;
17903   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17904     RootNode = Ldn->getChain().getNode();
17905     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17906          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17907       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17908         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17909           TryToAddCandidate(I2);
17910       }
17911       // Check stores that depend on the root (e.g. Store 3 in the chart above).
17912       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
17913         TryToAddCandidate(I);
17914       }
17915     }
17916   } else {
17917     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17918          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17919       TryToAddCandidate(I);
17920   }
17921 }
17922 
17923 // We need to check that merging these stores does not cause a loop in the
17924 // DAG. Any store candidate may depend on another candidate indirectly through
17925 // its operands. Check in parallel by searching up from operands of candidates.
17926 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17927     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17928     SDNode *RootNode) {
17929   // FIXME: We should be able to truncate a full search of
17930   // predecessors by doing a BFS and keeping tabs the originating
17931   // stores from which worklist nodes come from in a similar way to
17932   // TokenFactor simplfication.
17933 
17934   SmallPtrSet<const SDNode *, 32> Visited;
17935   SmallVector<const SDNode *, 8> Worklist;
17936 
17937   // RootNode is a predecessor to all candidates so we need not search
17938   // past it. Add RootNode (peeking through TokenFactors). Do not count
17939   // these towards size check.
17940 
17941   Worklist.push_back(RootNode);
17942   while (!Worklist.empty()) {
17943     auto N = Worklist.pop_back_val();
17944     if (!Visited.insert(N).second)
17945       continue; // Already present in Visited.
17946     if (N->getOpcode() == ISD::TokenFactor) {
17947       for (SDValue Op : N->ops())
17948         Worklist.push_back(Op.getNode());
17949     }
17950   }
17951 
17952   // Don't count pruning nodes towards max.
17953   unsigned int Max = 1024 + Visited.size();
17954   // Search Ops of store candidates.
17955   for (unsigned i = 0; i < NumStores; ++i) {
17956     SDNode *N = StoreNodes[i].MemNode;
17957     // Of the 4 Store Operands:
17958     //   * Chain (Op 0) -> We have already considered these
17959     //                     in candidate selection, but only by following the
17960     //                     chain dependencies. We could still have a chain
17961     //                     dependency to a load, that has a non-chain dep to
17962     //                     another load, that depends on a store, etc. So it is
17963     //                     possible to have dependencies that consist of a mix
17964     //                     of chain and non-chain deps, and we need to include
17965     //                     chain operands in the analysis here..
17966     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17967     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17968     //                       but aren't necessarily fromt the same base node, so
17969     //                       cycles possible (e.g. via indexed store).
17970     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17971     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17972     //               and so can participate in a cycle.
17973     for (unsigned j = 0; j < N->getNumOperands(); ++j)
17974       Worklist.push_back(N->getOperand(j).getNode());
17975   }
17976   // Search through DAG. We can stop early if we find a store node.
17977   for (unsigned i = 0; i < NumStores; ++i)
17978     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17979                                      Max)) {
17980       // If the searching bail out, record the StoreNode and RootNode in the
17981       // StoreRootCountMap. If we have seen the pair many times over a limit,
17982       // we won't add the StoreNode into StoreNodes set again.
17983       if (Visited.size() >= Max) {
17984         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17985         if (RootCount.first == RootNode)
17986           RootCount.second++;
17987         else
17988           RootCount = {RootNode, 1};
17989       }
17990       return false;
17991     }
17992   return true;
17993 }
17994 
17995 unsigned
17996 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17997                                   int64_t ElementSizeBytes) const {
17998   while (true) {
17999     // Find a store past the width of the first store.
18000     size_t StartIdx = 0;
18001     while ((StartIdx + 1 < StoreNodes.size()) &&
18002            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
18003               StoreNodes[StartIdx + 1].OffsetFromBase)
18004       ++StartIdx;
18005 
18006     // Bail if we don't have enough candidates to merge.
18007     if (StartIdx + 1 >= StoreNodes.size())
18008       return 0;
18009 
18010     // Trim stores that overlapped with the first store.
18011     if (StartIdx)
18012       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
18013 
18014     // Scan the memory operations on the chain and find the first
18015     // non-consecutive store memory address.
18016     unsigned NumConsecutiveStores = 1;
18017     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
18018     // Check that the addresses are consecutive starting from the second
18019     // element in the list of stores.
18020     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
18021       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
18022       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18023         break;
18024       NumConsecutiveStores = i + 1;
18025     }
18026     if (NumConsecutiveStores > 1)
18027       return NumConsecutiveStores;
18028 
18029     // There are no consecutive stores at the start of the list.
18030     // Remove the first store and try again.
18031     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
18032   }
18033 }
18034 
18035 bool DAGCombiner::tryStoreMergeOfConstants(
18036     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18037     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
18038   LLVMContext &Context = *DAG.getContext();
18039   const DataLayout &DL = DAG.getDataLayout();
18040   int64_t ElementSizeBytes = MemVT.getStoreSize();
18041   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18042   bool MadeChange = false;
18043 
18044   // Store the constants into memory as one consecutive store.
18045   while (NumConsecutiveStores >= 2) {
18046     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18047     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18048     unsigned FirstStoreAlign = FirstInChain->getAlignment();
18049     unsigned LastLegalType = 1;
18050     unsigned LastLegalVectorType = 1;
18051     bool LastIntegerTrunc = false;
18052     bool NonZero = false;
18053     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
18054     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18055       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
18056       SDValue StoredVal = ST->getValue();
18057       bool IsElementZero = false;
18058       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
18059         IsElementZero = C->isZero();
18060       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
18061         IsElementZero = C->getConstantFPValue()->isNullValue();
18062       if (IsElementZero) {
18063         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
18064           FirstZeroAfterNonZero = i;
18065       }
18066       NonZero |= !IsElementZero;
18067 
18068       // Find a legal type for the constant store.
18069       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18070       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18071       bool IsFast = false;
18072 
18073       // Break early when size is too large to be legal.
18074       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18075         break;
18076 
18077       if (TLI.isTypeLegal(StoreTy) &&
18078           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18079                                DAG.getMachineFunction()) &&
18080           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18081                                  *FirstInChain->getMemOperand(), &IsFast) &&
18082           IsFast) {
18083         LastIntegerTrunc = false;
18084         LastLegalType = i + 1;
18085         // Or check whether a truncstore is legal.
18086       } else if (TLI.getTypeAction(Context, StoreTy) ==
18087                  TargetLowering::TypePromoteInteger) {
18088         EVT LegalizedStoredValTy =
18089             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
18090         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18091             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18092                                  DAG.getMachineFunction()) &&
18093             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18094                                    *FirstInChain->getMemOperand(), &IsFast) &&
18095             IsFast) {
18096           LastIntegerTrunc = true;
18097           LastLegalType = i + 1;
18098         }
18099       }
18100 
18101       // We only use vectors if the constant is known to be zero or the
18102       // target allows it and the function is not marked with the
18103       // noimplicitfloat attribute.
18104       if ((!NonZero ||
18105            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
18106           AllowVectors) {
18107         // Find a legal type for the vector store.
18108         unsigned Elts = (i + 1) * NumMemElts;
18109         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18110         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
18111             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18112             TLI.allowsMemoryAccess(Context, DL, Ty,
18113                                    *FirstInChain->getMemOperand(), &IsFast) &&
18114             IsFast)
18115           LastLegalVectorType = i + 1;
18116       }
18117     }
18118 
18119     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
18120     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
18121     bool UseTrunc = LastIntegerTrunc && !UseVector;
18122 
18123     // Check if we found a legal integer type that creates a meaningful
18124     // merge.
18125     if (NumElem < 2) {
18126       // We know that candidate stores are in order and of correct
18127       // shape. While there is no mergeable sequence from the
18128       // beginning one may start later in the sequence. The only
18129       // reason a merge of size N could have failed where another of
18130       // the same size would not have, is if the alignment has
18131       // improved or we've dropped a non-zero value. Drop as many
18132       // candidates as we can here.
18133       unsigned NumSkip = 1;
18134       while ((NumSkip < NumConsecutiveStores) &&
18135              (NumSkip < FirstZeroAfterNonZero) &&
18136              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
18137         NumSkip++;
18138 
18139       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18140       NumConsecutiveStores -= NumSkip;
18141       continue;
18142     }
18143 
18144     // Check that we can merge these candidates without causing a cycle.
18145     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18146                                                   RootNode)) {
18147       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18148       NumConsecutiveStores -= NumElem;
18149       continue;
18150     }
18151 
18152     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
18153                                                   /*IsConstantSrc*/ true,
18154                                                   UseVector, UseTrunc);
18155 
18156     // Remove merged stores for next iteration.
18157     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18158     NumConsecutiveStores -= NumElem;
18159   }
18160   return MadeChange;
18161 }
18162 
18163 bool DAGCombiner::tryStoreMergeOfExtracts(
18164     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18165     EVT MemVT, SDNode *RootNode) {
18166   LLVMContext &Context = *DAG.getContext();
18167   const DataLayout &DL = DAG.getDataLayout();
18168   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18169   bool MadeChange = false;
18170 
18171   // Loop on Consecutive Stores on success.
18172   while (NumConsecutiveStores >= 2) {
18173     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18174     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18175     unsigned FirstStoreAlign = FirstInChain->getAlignment();
18176     unsigned NumStoresToMerge = 1;
18177     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18178       // Find a legal type for the vector store.
18179       unsigned Elts = (i + 1) * NumMemElts;
18180       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
18181       bool IsFast = false;
18182 
18183       // Break early when size is too large to be legal.
18184       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
18185         break;
18186 
18187       if (TLI.isTypeLegal(Ty) &&
18188           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18189           TLI.allowsMemoryAccess(Context, DL, Ty,
18190                                  *FirstInChain->getMemOperand(), &IsFast) &&
18191           IsFast)
18192         NumStoresToMerge = i + 1;
18193     }
18194 
18195     // Check if we found a legal integer type creating a meaningful
18196     // merge.
18197     if (NumStoresToMerge < 2) {
18198       // We know that candidate stores are in order and of correct
18199       // shape. While there is no mergeable sequence from the
18200       // beginning one may start later in the sequence. The only
18201       // reason a merge of size N could have failed where another of
18202       // the same size would not have, is if the alignment has
18203       // improved. Drop as many candidates as we can here.
18204       unsigned NumSkip = 1;
18205       while ((NumSkip < NumConsecutiveStores) &&
18206              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
18207         NumSkip++;
18208 
18209       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18210       NumConsecutiveStores -= NumSkip;
18211       continue;
18212     }
18213 
18214     // Check that we can merge these candidates without causing a cycle.
18215     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
18216                                                   RootNode)) {
18217       StoreNodes.erase(StoreNodes.begin(),
18218                        StoreNodes.begin() + NumStoresToMerge);
18219       NumConsecutiveStores -= NumStoresToMerge;
18220       continue;
18221     }
18222 
18223     MadeChange |= mergeStoresOfConstantsOrVecElts(
18224         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
18225         /*UseVector*/ true, /*UseTrunc*/ false);
18226 
18227     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
18228     NumConsecutiveStores -= NumStoresToMerge;
18229   }
18230   return MadeChange;
18231 }
18232 
18233 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
18234                                        unsigned NumConsecutiveStores, EVT MemVT,
18235                                        SDNode *RootNode, bool AllowVectors,
18236                                        bool IsNonTemporalStore,
18237                                        bool IsNonTemporalLoad) {
18238   LLVMContext &Context = *DAG.getContext();
18239   const DataLayout &DL = DAG.getDataLayout();
18240   int64_t ElementSizeBytes = MemVT.getStoreSize();
18241   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18242   bool MadeChange = false;
18243 
18244   // Look for load nodes which are used by the stored values.
18245   SmallVector<MemOpLink, 8> LoadNodes;
18246 
18247   // Find acceptable loads. Loads need to have the same chain (token factor),
18248   // must not be zext, volatile, indexed, and they must be consecutive.
18249   BaseIndexOffset LdBasePtr;
18250 
18251   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18252     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18253     SDValue Val = peekThroughBitcasts(St->getValue());
18254     LoadSDNode *Ld = cast<LoadSDNode>(Val);
18255 
18256     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18257     // If this is not the first ptr that we check.
18258     int64_t LdOffset = 0;
18259     if (LdBasePtr.getBase().getNode()) {
18260       // The base ptr must be the same.
18261       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18262         break;
18263     } else {
18264       // Check that all other base pointers are the same as this one.
18265       LdBasePtr = LdPtr;
18266     }
18267 
18268     // We found a potential memory operand to merge.
18269     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18270   }
18271 
18272   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18273     Align RequiredAlignment;
18274     bool NeedRotate = false;
18275     if (LoadNodes.size() == 2) {
18276       // If we have load/store pair instructions and we only have two values,
18277       // don't bother merging.
18278       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18279           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18280         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18281         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18282         break;
18283       }
18284       // If the loads are reversed, see if we can rotate the halves into place.
18285       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18286       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18287       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18288       if (Offset0 - Offset1 == ElementSizeBytes &&
18289           (hasOperation(ISD::ROTL, PairVT) ||
18290            hasOperation(ISD::ROTR, PairVT))) {
18291         std::swap(LoadNodes[0], LoadNodes[1]);
18292         NeedRotate = true;
18293       }
18294     }
18295     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18296     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18297     Align FirstStoreAlign = FirstInChain->getAlign();
18298     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18299 
18300     // Scan the memory operations on the chain and find the first
18301     // non-consecutive load memory address. These variables hold the index in
18302     // the store node array.
18303 
18304     unsigned LastConsecutiveLoad = 1;
18305 
18306     // This variable refers to the size and not index in the array.
18307     unsigned LastLegalVectorType = 1;
18308     unsigned LastLegalIntegerType = 1;
18309     bool isDereferenceable = true;
18310     bool DoIntegerTruncate = false;
18311     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18312     SDValue LoadChain = FirstLoad->getChain();
18313     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18314       // All loads must share the same chain.
18315       if (LoadNodes[i].MemNode->getChain() != LoadChain)
18316         break;
18317 
18318       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18319       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18320         break;
18321       LastConsecutiveLoad = i;
18322 
18323       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18324         isDereferenceable = false;
18325 
18326       // Find a legal type for the vector store.
18327       unsigned Elts = (i + 1) * NumMemElts;
18328       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18329 
18330       // Break early when size is too large to be legal.
18331       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18332         break;
18333 
18334       bool IsFastSt = false;
18335       bool IsFastLd = false;
18336       // Don't try vector types if we need a rotate. We may still fail the
18337       // legality checks for the integer type, but we can't handle the rotate
18338       // case with vectors.
18339       // FIXME: We could use a shuffle in place of the rotate.
18340       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18341           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18342                                DAG.getMachineFunction()) &&
18343           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18344                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18345           IsFastSt &&
18346           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18347                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18348           IsFastLd) {
18349         LastLegalVectorType = i + 1;
18350       }
18351 
18352       // Find a legal type for the integer store.
18353       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18354       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18355       if (TLI.isTypeLegal(StoreTy) &&
18356           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18357                                DAG.getMachineFunction()) &&
18358           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18359                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18360           IsFastSt &&
18361           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18362                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18363           IsFastLd) {
18364         LastLegalIntegerType = i + 1;
18365         DoIntegerTruncate = false;
18366         // Or check whether a truncstore and extload is legal.
18367       } else if (TLI.getTypeAction(Context, StoreTy) ==
18368                  TargetLowering::TypePromoteInteger) {
18369         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18370         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18371             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18372                                  DAG.getMachineFunction()) &&
18373             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18374             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18375             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18376             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18377                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
18378             IsFastSt &&
18379             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18380                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
18381             IsFastLd) {
18382           LastLegalIntegerType = i + 1;
18383           DoIntegerTruncate = true;
18384         }
18385       }
18386     }
18387 
18388     // Only use vector types if the vector type is larger than the integer
18389     // type. If they are the same, use integers.
18390     bool UseVectorTy =
18391         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18392     unsigned LastLegalType =
18393         std::max(LastLegalVectorType, LastLegalIntegerType);
18394 
18395     // We add +1 here because the LastXXX variables refer to location while
18396     // the NumElem refers to array/index size.
18397     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18398     NumElem = std::min(LastLegalType, NumElem);
18399     Align FirstLoadAlign = FirstLoad->getAlign();
18400 
18401     if (NumElem < 2) {
18402       // We know that candidate stores are in order and of correct
18403       // shape. While there is no mergeable sequence from the
18404       // beginning one may start later in the sequence. The only
18405       // reason a merge of size N could have failed where another of
18406       // the same size would not have is if the alignment or either
18407       // the load or store has improved. Drop as many candidates as we
18408       // can here.
18409       unsigned NumSkip = 1;
18410       while ((NumSkip < LoadNodes.size()) &&
18411              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18412              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18413         NumSkip++;
18414       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18415       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18416       NumConsecutiveStores -= NumSkip;
18417       continue;
18418     }
18419 
18420     // Check that we can merge these candidates without causing a cycle.
18421     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18422                                                   RootNode)) {
18423       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18424       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18425       NumConsecutiveStores -= NumElem;
18426       continue;
18427     }
18428 
18429     // Find if it is better to use vectors or integers to load and store
18430     // to memory.
18431     EVT JointMemOpVT;
18432     if (UseVectorTy) {
18433       // Find a legal type for the vector store.
18434       unsigned Elts = NumElem * NumMemElts;
18435       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18436     } else {
18437       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18438       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18439     }
18440 
18441     SDLoc LoadDL(LoadNodes[0].MemNode);
18442     SDLoc StoreDL(StoreNodes[0].MemNode);
18443 
18444     // The merged loads are required to have the same incoming chain, so
18445     // using the first's chain is acceptable.
18446 
18447     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18448     AddToWorklist(NewStoreChain.getNode());
18449 
18450     MachineMemOperand::Flags LdMMOFlags =
18451         isDereferenceable ? MachineMemOperand::MODereferenceable
18452                           : MachineMemOperand::MONone;
18453     if (IsNonTemporalLoad)
18454       LdMMOFlags |= MachineMemOperand::MONonTemporal;
18455 
18456     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18457                                               ? MachineMemOperand::MONonTemporal
18458                                               : MachineMemOperand::MONone;
18459 
18460     SDValue NewLoad, NewStore;
18461     if (UseVectorTy || !DoIntegerTruncate) {
18462       NewLoad = DAG.getLoad(
18463           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18464           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18465       SDValue StoreOp = NewLoad;
18466       if (NeedRotate) {
18467         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18468         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18469                "Unexpected type for rotate-able load pair");
18470         SDValue RotAmt =
18471             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18472         // Target can convert to the identical ROTR if it does not have ROTL.
18473         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18474       }
18475       NewStore = DAG.getStore(
18476           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18477           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18478     } else { // This must be the truncstore/extload case
18479       EVT ExtendedTy =
18480           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18481       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18482                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
18483                                FirstLoad->getPointerInfo(), JointMemOpVT,
18484                                FirstLoadAlign, LdMMOFlags);
18485       NewStore = DAG.getTruncStore(
18486           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18487           FirstInChain->getPointerInfo(), JointMemOpVT,
18488           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18489     }
18490 
18491     // Transfer chain users from old loads to the new load.
18492     for (unsigned i = 0; i < NumElem; ++i) {
18493       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18494       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18495                                     SDValue(NewLoad.getNode(), 1));
18496     }
18497 
18498     // Replace all stores with the new store. Recursively remove corresponding
18499     // values if they are no longer used.
18500     for (unsigned i = 0; i < NumElem; ++i) {
18501       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18502       CombineTo(StoreNodes[i].MemNode, NewStore);
18503       if (Val->use_empty())
18504         recursivelyDeleteUnusedNodes(Val.getNode());
18505     }
18506 
18507     MadeChange = true;
18508     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18509     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18510     NumConsecutiveStores -= NumElem;
18511   }
18512   return MadeChange;
18513 }
18514 
18515 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18516   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18517     return false;
18518 
18519   // TODO: Extend this function to merge stores of scalable vectors.
18520   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18521   // store since we know <vscale x 16 x i8> is exactly twice as large as
18522   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18523   EVT MemVT = St->getMemoryVT();
18524   if (MemVT.isScalableVector())
18525     return false;
18526   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18527     return false;
18528 
18529   // This function cannot currently deal with non-byte-sized memory sizes.
18530   int64_t ElementSizeBytes = MemVT.getStoreSize();
18531   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18532     return false;
18533 
18534   // Do not bother looking at stored values that are not constants, loads, or
18535   // extracted vector elements.
18536   SDValue StoredVal = peekThroughBitcasts(St->getValue());
18537   const StoreSource StoreSrc = getStoreSource(StoredVal);
18538   if (StoreSrc == StoreSource::Unknown)
18539     return false;
18540 
18541   SmallVector<MemOpLink, 8> StoreNodes;
18542   SDNode *RootNode;
18543   // Find potential store merge candidates by searching through chain sub-DAG
18544   getStoreMergeCandidates(St, StoreNodes, RootNode);
18545 
18546   // Check if there is anything to merge.
18547   if (StoreNodes.size() < 2)
18548     return false;
18549 
18550   // Sort the memory operands according to their distance from the
18551   // base pointer.
18552   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18553     return LHS.OffsetFromBase < RHS.OffsetFromBase;
18554   });
18555 
18556   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18557       Attribute::NoImplicitFloat);
18558   bool IsNonTemporalStore = St->isNonTemporal();
18559   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18560                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
18561 
18562   // Store Merge attempts to merge the lowest stores. This generally
18563   // works out as if successful, as the remaining stores are checked
18564   // after the first collection of stores is merged. However, in the
18565   // case that a non-mergeable store is found first, e.g., {p[-2],
18566   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18567   // mergeable cases. To prevent this, we prune such stores from the
18568   // front of StoreNodes here.
18569   bool MadeChange = false;
18570   while (StoreNodes.size() > 1) {
18571     unsigned NumConsecutiveStores =
18572         getConsecutiveStores(StoreNodes, ElementSizeBytes);
18573     // There are no more stores in the list to examine.
18574     if (NumConsecutiveStores == 0)
18575       return MadeChange;
18576 
18577     // We have at least 2 consecutive stores. Try to merge them.
18578     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18579     switch (StoreSrc) {
18580     case StoreSource::Constant:
18581       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18582                                              MemVT, RootNode, AllowVectors);
18583       break;
18584 
18585     case StoreSource::Extract:
18586       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18587                                             MemVT, RootNode);
18588       break;
18589 
18590     case StoreSource::Load:
18591       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18592                                          MemVT, RootNode, AllowVectors,
18593                                          IsNonTemporalStore, IsNonTemporalLoad);
18594       break;
18595 
18596     default:
18597       llvm_unreachable("Unhandled store source type");
18598     }
18599   }
18600   return MadeChange;
18601 }
18602 
18603 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18604   SDLoc SL(ST);
18605   SDValue ReplStore;
18606 
18607   // Replace the chain to avoid dependency.
18608   if (ST->isTruncatingStore()) {
18609     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18610                                   ST->getBasePtr(), ST->getMemoryVT(),
18611                                   ST->getMemOperand());
18612   } else {
18613     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18614                              ST->getMemOperand());
18615   }
18616 
18617   // Create token to keep both nodes around.
18618   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18619                               MVT::Other, ST->getChain(), ReplStore);
18620 
18621   // Make sure the new and old chains are cleaned up.
18622   AddToWorklist(Token.getNode());
18623 
18624   // Don't add users to work list.
18625   return CombineTo(ST, Token, false);
18626 }
18627 
18628 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18629   SDValue Value = ST->getValue();
18630   if (Value.getOpcode() == ISD::TargetConstantFP)
18631     return SDValue();
18632 
18633   if (!ISD::isNormalStore(ST))
18634     return SDValue();
18635 
18636   SDLoc DL(ST);
18637 
18638   SDValue Chain = ST->getChain();
18639   SDValue Ptr = ST->getBasePtr();
18640 
18641   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18642 
18643   // NOTE: If the original store is volatile, this transform must not increase
18644   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18645   // processor operation but an i64 (which is not legal) requires two.  So the
18646   // transform should not be done in this case.
18647 
18648   SDValue Tmp;
18649   switch (CFP->getSimpleValueType(0).SimpleTy) {
18650   default:
18651     llvm_unreachable("Unknown FP type");
18652   case MVT::f16:    // We don't do this for these yet.
18653   case MVT::f80:
18654   case MVT::f128:
18655   case MVT::ppcf128:
18656     return SDValue();
18657   case MVT::f32:
18658     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18659         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18660       ;
18661       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18662                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18663                             MVT::i32);
18664       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18665     }
18666 
18667     return SDValue();
18668   case MVT::f64:
18669     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18670          ST->isSimple()) ||
18671         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18672       ;
18673       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18674                             getZExtValue(), SDLoc(CFP), MVT::i64);
18675       return DAG.getStore(Chain, DL, Tmp,
18676                           Ptr, ST->getMemOperand());
18677     }
18678 
18679     if (ST->isSimple() &&
18680         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18681       // Many FP stores are not made apparent until after legalize, e.g. for
18682       // argument passing.  Since this is so common, custom legalize the
18683       // 64-bit integer store into two 32-bit stores.
18684       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18685       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18686       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18687       if (DAG.getDataLayout().isBigEndian())
18688         std::swap(Lo, Hi);
18689 
18690       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18691       AAMDNodes AAInfo = ST->getAAInfo();
18692 
18693       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18694                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18695       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18696       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18697                                  ST->getPointerInfo().getWithOffset(4),
18698                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18699       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18700                          St0, St1);
18701     }
18702 
18703     return SDValue();
18704   }
18705 }
18706 
18707 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18708   StoreSDNode *ST  = cast<StoreSDNode>(N);
18709   SDValue Chain = ST->getChain();
18710   SDValue Value = ST->getValue();
18711   SDValue Ptr   = ST->getBasePtr();
18712 
18713   // If this is a store of a bit convert, store the input value if the
18714   // resultant store does not need a higher alignment than the original.
18715   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18716       ST->isUnindexed()) {
18717     EVT SVT = Value.getOperand(0).getValueType();
18718     // If the store is volatile, we only want to change the store type if the
18719     // resulting store is legal. Otherwise we might increase the number of
18720     // memory accesses. We don't care if the original type was legal or not
18721     // as we assume software couldn't rely on the number of accesses of an
18722     // illegal type.
18723     // TODO: May be able to relax for unordered atomics (see D66309)
18724     if (((!LegalOperations && ST->isSimple()) ||
18725          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18726         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18727                                      DAG, *ST->getMemOperand())) {
18728       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18729                           ST->getMemOperand());
18730     }
18731   }
18732 
18733   // Turn 'store undef, Ptr' -> nothing.
18734   if (Value.isUndef() && ST->isUnindexed())
18735     return Chain;
18736 
18737   // Try to infer better alignment information than the store already has.
18738   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18739     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18740       if (*Alignment > ST->getAlign() &&
18741           isAligned(*Alignment, ST->getSrcValueOffset())) {
18742         SDValue NewStore =
18743             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18744                               ST->getMemoryVT(), *Alignment,
18745                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18746         // NewStore will always be N as we are only refining the alignment
18747         assert(NewStore.getNode() == N);
18748         (void)NewStore;
18749       }
18750     }
18751   }
18752 
18753   // Try transforming a pair floating point load / store ops to integer
18754   // load / store ops.
18755   if (SDValue NewST = TransformFPLoadStorePair(N))
18756     return NewST;
18757 
18758   // Try transforming several stores into STORE (BSWAP).
18759   if (SDValue Store = mergeTruncStores(ST))
18760     return Store;
18761 
18762   if (ST->isUnindexed()) {
18763     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18764     // adjacent stores.
18765     if (findBetterNeighborChains(ST)) {
18766       // replaceStoreChain uses CombineTo, which handled all of the worklist
18767       // manipulation. Return the original node to not do anything else.
18768       return SDValue(ST, 0);
18769     }
18770     Chain = ST->getChain();
18771   }
18772 
18773   // FIXME: is there such a thing as a truncating indexed store?
18774   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18775       Value.getValueType().isInteger() &&
18776       (!isa<ConstantSDNode>(Value) ||
18777        !cast<ConstantSDNode>(Value)->isOpaque())) {
18778     // Convert a truncating store of a extension into a standard store.
18779     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18780          Value.getOpcode() == ISD::SIGN_EXTEND ||
18781          Value.getOpcode() == ISD::ANY_EXTEND) &&
18782         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18783         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18784       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18785                           ST->getMemOperand());
18786 
18787     APInt TruncDemandedBits =
18788         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18789                              ST->getMemoryVT().getScalarSizeInBits());
18790 
18791     // See if we can simplify the input to this truncstore with knowledge that
18792     // only the low bits are being used.  For example:
18793     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18794     AddToWorklist(Value.getNode());
18795     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18796       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18797                                ST->getMemOperand());
18798 
18799     // Otherwise, see if we can simplify the operation with
18800     // SimplifyDemandedBits, which only works if the value has a single use.
18801     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18802       // Re-visit the store if anything changed and the store hasn't been merged
18803       // with another node (N is deleted) SimplifyDemandedBits will add Value's
18804       // node back to the worklist if necessary, but we also need to re-visit
18805       // the Store node itself.
18806       if (N->getOpcode() != ISD::DELETED_NODE)
18807         AddToWorklist(N);
18808       return SDValue(N, 0);
18809     }
18810   }
18811 
18812   // If this is a load followed by a store to the same location, then the store
18813   // is dead/noop.
18814   // TODO: Can relax for unordered atomics (see D66309)
18815   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
18816     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
18817         ST->isUnindexed() && ST->isSimple() &&
18818         Ld->getAddressSpace() == ST->getAddressSpace() &&
18819         // There can't be any side effects between the load and store, such as
18820         // a call or store.
18821         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
18822       // The store is dead, remove it.
18823       return Chain;
18824     }
18825   }
18826 
18827   // TODO: Can relax for unordered atomics (see D66309)
18828   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
18829     if (ST->isUnindexed() && ST->isSimple() &&
18830         ST1->isUnindexed() && ST1->isSimple()) {
18831       if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
18832           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
18833           ST->getAddressSpace() == ST1->getAddressSpace()) {
18834         // If this is a store followed by a store with the same value to the
18835         // same location, then the store is dead/noop.
18836         return Chain;
18837       }
18838 
18839       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
18840           !ST1->getBasePtr().isUndef() &&
18841           // BaseIndexOffset and the code below requires knowing the size
18842           // of a vector, so bail out if MemoryVT is scalable.
18843           !ST->getMemoryVT().isScalableVector() &&
18844           !ST1->getMemoryVT().isScalableVector() &&
18845           ST->getAddressSpace() == ST1->getAddressSpace()) {
18846         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
18847         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
18848         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
18849         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
18850         // If this is a store who's preceding store to a subset of the current
18851         // location and no one other node is chained to that store we can
18852         // effectively drop the store. Do not remove stores to undef as they may
18853         // be used as data sinks.
18854         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
18855           CombineTo(ST1, ST1->getChain());
18856           return SDValue();
18857         }
18858       }
18859     }
18860   }
18861 
18862   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
18863   // truncating store.  We can do this even if this is already a truncstore.
18864   if ((Value.getOpcode() == ISD::FP_ROUND ||
18865        Value.getOpcode() == ISD::TRUNCATE) &&
18866       Value->hasOneUse() && ST->isUnindexed() &&
18867       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18868                                ST->getMemoryVT(), LegalOperations)) {
18869     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
18870                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
18871   }
18872 
18873   // Always perform this optimization before types are legal. If the target
18874   // prefers, also try this after legalization to catch stores that were created
18875   // by intrinsics or other nodes.
18876   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
18877     while (true) {
18878       // There can be multiple store sequences on the same chain.
18879       // Keep trying to merge store sequences until we are unable to do so
18880       // or until we merge the last store on the chain.
18881       bool Changed = mergeConsecutiveStores(ST);
18882       if (!Changed) break;
18883       // Return N as merge only uses CombineTo and no worklist clean
18884       // up is necessary.
18885       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
18886         return SDValue(N, 0);
18887     }
18888   }
18889 
18890   // Try transforming N to an indexed store.
18891   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18892     return SDValue(N, 0);
18893 
18894   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
18895   //
18896   // Make sure to do this only after attempting to merge stores in order to
18897   //  avoid changing the types of some subset of stores due to visit order,
18898   //  preventing their merging.
18899   if (isa<ConstantFPSDNode>(ST->getValue())) {
18900     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
18901       return NewSt;
18902   }
18903 
18904   if (SDValue NewSt = splitMergedValStore(ST))
18905     return NewSt;
18906 
18907   return ReduceLoadOpStoreWidth(N);
18908 }
18909 
18910 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18911   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18912   if (!LifetimeEnd->hasOffset())
18913     return SDValue();
18914 
18915   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18916                                         LifetimeEnd->getOffset(), false);
18917 
18918   // We walk up the chains to find stores.
18919   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18920   while (!Chains.empty()) {
18921     SDValue Chain = Chains.pop_back_val();
18922     if (!Chain.hasOneUse())
18923       continue;
18924     switch (Chain.getOpcode()) {
18925     case ISD::TokenFactor:
18926       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18927         Chains.push_back(Chain.getOperand(--Nops));
18928       break;
18929     case ISD::LIFETIME_START:
18930     case ISD::LIFETIME_END:
18931       // We can forward past any lifetime start/end that can be proven not to
18932       // alias the node.
18933       if (!mayAlias(Chain.getNode(), N))
18934         Chains.push_back(Chain.getOperand(0));
18935       break;
18936     case ISD::STORE: {
18937       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18938       // TODO: Can relax for unordered atomics (see D66309)
18939       if (!ST->isSimple() || ST->isIndexed())
18940         continue;
18941       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18942       // The bounds of a scalable store are not known until runtime, so this
18943       // store cannot be elided.
18944       if (StoreSize.isScalable())
18945         continue;
18946       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18947       // If we store purely within object bounds just before its lifetime ends,
18948       // we can remove the store.
18949       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18950                                    StoreSize.getFixedSize() * 8)) {
18951         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18952                    dbgs() << "\nwithin LIFETIME_END of : ";
18953                    LifetimeEndBase.dump(); dbgs() << "\n");
18954         CombineTo(ST, ST->getChain());
18955         return SDValue(N, 0);
18956       }
18957     }
18958     }
18959   }
18960   return SDValue();
18961 }
18962 
18963 /// For the instruction sequence of store below, F and I values
18964 /// are bundled together as an i64 value before being stored into memory.
18965 /// Sometimes it is more efficent to generate separate stores for F and I,
18966 /// which can remove the bitwise instructions or sink them to colder places.
18967 ///
18968 ///   (store (or (zext (bitcast F to i32) to i64),
18969 ///              (shl (zext I to i64), 32)), addr)  -->
18970 ///   (store F, addr) and (store I, addr+4)
18971 ///
18972 /// Similarly, splitting for other merged store can also be beneficial, like:
18973 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18974 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18975 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18976 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18977 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18978 ///
18979 /// We allow each target to determine specifically which kind of splitting is
18980 /// supported.
18981 ///
18982 /// The store patterns are commonly seen from the simple code snippet below
18983 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18984 ///   void goo(const std::pair<int, float> &);
18985 ///   hoo() {
18986 ///     ...
18987 ///     goo(std::make_pair(tmp, ftmp));
18988 ///     ...
18989 ///   }
18990 ///
18991 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18992   if (OptLevel == CodeGenOpt::None)
18993     return SDValue();
18994 
18995   // Can't change the number of memory accesses for a volatile store or break
18996   // atomicity for an atomic one.
18997   if (!ST->isSimple())
18998     return SDValue();
18999 
19000   SDValue Val = ST->getValue();
19001   SDLoc DL(ST);
19002 
19003   // Match OR operand.
19004   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
19005     return SDValue();
19006 
19007   // Match SHL operand and get Lower and Higher parts of Val.
19008   SDValue Op1 = Val.getOperand(0);
19009   SDValue Op2 = Val.getOperand(1);
19010   SDValue Lo, Hi;
19011   if (Op1.getOpcode() != ISD::SHL) {
19012     std::swap(Op1, Op2);
19013     if (Op1.getOpcode() != ISD::SHL)
19014       return SDValue();
19015   }
19016   Lo = Op2;
19017   Hi = Op1.getOperand(0);
19018   if (!Op1.hasOneUse())
19019     return SDValue();
19020 
19021   // Match shift amount to HalfValBitSize.
19022   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
19023   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
19024   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
19025     return SDValue();
19026 
19027   // Lo and Hi are zero-extended from int with size less equal than 32
19028   // to i64.
19029   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
19030       !Lo.getOperand(0).getValueType().isScalarInteger() ||
19031       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
19032       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
19033       !Hi.getOperand(0).getValueType().isScalarInteger() ||
19034       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
19035     return SDValue();
19036 
19037   // Use the EVT of low and high parts before bitcast as the input
19038   // of target query.
19039   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
19040                   ? Lo.getOperand(0).getValueType()
19041                   : Lo.getValueType();
19042   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
19043                    ? Hi.getOperand(0).getValueType()
19044                    : Hi.getValueType();
19045   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
19046     return SDValue();
19047 
19048   // Start to split store.
19049   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
19050   AAMDNodes AAInfo = ST->getAAInfo();
19051 
19052   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
19053   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
19054   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
19055   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
19056 
19057   SDValue Chain = ST->getChain();
19058   SDValue Ptr = ST->getBasePtr();
19059   // Lower value store.
19060   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
19061                              ST->getOriginalAlign(), MMOFlags, AAInfo);
19062   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
19063   // Higher value store.
19064   SDValue St1 = DAG.getStore(
19065       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
19066       ST->getOriginalAlign(), MMOFlags, AAInfo);
19067   return St1;
19068 }
19069 
19070 /// Convert a disguised subvector insertion into a shuffle:
19071 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
19072   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
19073          "Expected extract_vector_elt");
19074   SDValue InsertVal = N->getOperand(1);
19075   SDValue Vec = N->getOperand(0);
19076 
19077   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
19078   // InsIndex)
19079   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
19080   //   CONCAT_VECTORS.
19081   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
19082       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19083       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
19084     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
19085     ArrayRef<int> Mask = SVN->getMask();
19086 
19087     SDValue X = Vec.getOperand(0);
19088     SDValue Y = Vec.getOperand(1);
19089 
19090     // Vec's operand 0 is using indices from 0 to N-1 and
19091     // operand 1 from N to 2N - 1, where N is the number of
19092     // elements in the vectors.
19093     SDValue InsertVal0 = InsertVal.getOperand(0);
19094     int ElementOffset = -1;
19095 
19096     // We explore the inputs of the shuffle in order to see if we find the
19097     // source of the extract_vector_elt. If so, we can use it to modify the
19098     // shuffle rather than perform an insert_vector_elt.
19099     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
19100     ArgWorkList.emplace_back(Mask.size(), Y);
19101     ArgWorkList.emplace_back(0, X);
19102 
19103     while (!ArgWorkList.empty()) {
19104       int ArgOffset;
19105       SDValue ArgVal;
19106       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
19107 
19108       if (ArgVal == InsertVal0) {
19109         ElementOffset = ArgOffset;
19110         break;
19111       }
19112 
19113       // Peek through concat_vector.
19114       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
19115         int CurrentArgOffset =
19116             ArgOffset + ArgVal.getValueType().getVectorNumElements();
19117         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
19118         for (SDValue Op : reverse(ArgVal->ops())) {
19119           CurrentArgOffset -= Step;
19120           ArgWorkList.emplace_back(CurrentArgOffset, Op);
19121         }
19122 
19123         // Make sure we went through all the elements and did not screw up index
19124         // computation.
19125         assert(CurrentArgOffset == ArgOffset);
19126       }
19127     }
19128 
19129     if (ElementOffset != -1) {
19130       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
19131 
19132       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
19133       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
19134       assert(NewMask[InsIndex] <
19135                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
19136              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
19137 
19138       SDValue LegalShuffle =
19139               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
19140                                           Y, NewMask, DAG);
19141       if (LegalShuffle)
19142         return LegalShuffle;
19143     }
19144   }
19145 
19146   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
19147   // bitcast(shuffle (bitcast V), (extended X), Mask)
19148   // Note: We do not use an insert_subvector node because that requires a
19149   // legal subvector type.
19150   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
19151       !InsertVal.getOperand(0).getValueType().isVector())
19152     return SDValue();
19153 
19154   SDValue SubVec = InsertVal.getOperand(0);
19155   SDValue DestVec = N->getOperand(0);
19156   EVT SubVecVT = SubVec.getValueType();
19157   EVT VT = DestVec.getValueType();
19158   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
19159   // If the source only has a single vector element, the cost of creating adding
19160   // it to a vector is likely to exceed the cost of a insert_vector_elt.
19161   if (NumSrcElts == 1)
19162     return SDValue();
19163   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
19164   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
19165 
19166   // Step 1: Create a shuffle mask that implements this insert operation. The
19167   // vector that we are inserting into will be operand 0 of the shuffle, so
19168   // those elements are just 'i'. The inserted subvector is in the first
19169   // positions of operand 1 of the shuffle. Example:
19170   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
19171   SmallVector<int, 16> Mask(NumMaskVals);
19172   for (unsigned i = 0; i != NumMaskVals; ++i) {
19173     if (i / NumSrcElts == InsIndex)
19174       Mask[i] = (i % NumSrcElts) + NumMaskVals;
19175     else
19176       Mask[i] = i;
19177   }
19178 
19179   // Bail out if the target can not handle the shuffle we want to create.
19180   EVT SubVecEltVT = SubVecVT.getVectorElementType();
19181   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
19182   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
19183     return SDValue();
19184 
19185   // Step 2: Create a wide vector from the inserted source vector by appending
19186   // undefined elements. This is the same size as our destination vector.
19187   SDLoc DL(N);
19188   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
19189   ConcatOps[0] = SubVec;
19190   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
19191 
19192   // Step 3: Shuffle in the padded subvector.
19193   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
19194   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
19195   AddToWorklist(PaddedSubV.getNode());
19196   AddToWorklist(DestVecBC.getNode());
19197   AddToWorklist(Shuf.getNode());
19198   return DAG.getBitcast(VT, Shuf);
19199 }
19200 
19201 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
19202   SDValue InVec = N->getOperand(0);
19203   SDValue InVal = N->getOperand(1);
19204   SDValue EltNo = N->getOperand(2);
19205   SDLoc DL(N);
19206 
19207   EVT VT = InVec.getValueType();
19208   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19209 
19210   // Insert into out-of-bounds element is undefined.
19211   if (IndexC && VT.isFixedLengthVector() &&
19212       IndexC->getZExtValue() >= VT.getVectorNumElements())
19213     return DAG.getUNDEF(VT);
19214 
19215   // Remove redundant insertions:
19216   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
19217   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19218       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
19219     return InVec;
19220 
19221   if (!IndexC) {
19222     // If this is variable insert to undef vector, it might be better to splat:
19223     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
19224     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
19225       if (VT.isScalableVector())
19226         return DAG.getSplatVector(VT, DL, InVal);
19227 
19228       SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
19229       return DAG.getBuildVector(VT, DL, Ops);
19230     }
19231     return SDValue();
19232   }
19233 
19234   if (VT.isScalableVector())
19235     return SDValue();
19236 
19237   unsigned NumElts = VT.getVectorNumElements();
19238 
19239   // We must know which element is being inserted for folds below here.
19240   unsigned Elt = IndexC->getZExtValue();
19241   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
19242     return Shuf;
19243 
19244   // Canonicalize insert_vector_elt dag nodes.
19245   // Example:
19246   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
19247   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
19248   //
19249   // Do this only if the child insert_vector node has one use; also
19250   // do this only if indices are both constants and Idx1 < Idx0.
19251   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19252       && isa<ConstantSDNode>(InVec.getOperand(2))) {
19253     unsigned OtherElt = InVec.getConstantOperandVal(2);
19254     if (Elt < OtherElt) {
19255       // Swap nodes.
19256       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19257                                   InVec.getOperand(0), InVal, EltNo);
19258       AddToWorklist(NewOp.getNode());
19259       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19260                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19261     }
19262   }
19263 
19264   // If we can't generate a legal BUILD_VECTOR, exit
19265   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
19266     return SDValue();
19267 
19268   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19269   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
19270   // vector elements.
19271   SmallVector<SDValue, 8> Ops;
19272   // Do not combine these two vectors if the output vector will not replace
19273   // the input vector.
19274   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19275     Ops.append(InVec->op_begin(), InVec->op_end());
19276   } else if (InVec.isUndef()) {
19277     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19278   } else {
19279     return SDValue();
19280   }
19281   assert(Ops.size() == NumElts && "Unexpected vector size");
19282 
19283   // Insert the element
19284   if (Elt < Ops.size()) {
19285     // All the operands of BUILD_VECTOR must have the same type;
19286     // we enforce that here.
19287     EVT OpVT = Ops[0].getValueType();
19288     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19289   }
19290 
19291   // Return the new vector
19292   return DAG.getBuildVector(VT, DL, Ops);
19293 }
19294 
19295 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19296                                                   SDValue EltNo,
19297                                                   LoadSDNode *OriginalLoad) {
19298   assert(OriginalLoad->isSimple());
19299 
19300   EVT ResultVT = EVE->getValueType(0);
19301   EVT VecEltVT = InVecVT.getVectorElementType();
19302 
19303   // If the vector element type is not a multiple of a byte then we are unable
19304   // to correctly compute an address to load only the extracted element as a
19305   // scalar.
19306   if (!VecEltVT.isByteSized())
19307     return SDValue();
19308 
19309   ISD::LoadExtType ExtTy =
19310       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19311   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19312       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19313     return SDValue();
19314 
19315   Align Alignment = OriginalLoad->getAlign();
19316   MachinePointerInfo MPI;
19317   SDLoc DL(EVE);
19318   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19319     int Elt = ConstEltNo->getZExtValue();
19320     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19321     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19322     Alignment = commonAlignment(Alignment, PtrOff);
19323   } else {
19324     // Discard the pointer info except the address space because the memory
19325     // operand can't represent this new access since the offset is variable.
19326     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19327     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19328   }
19329 
19330   bool IsFast = false;
19331   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19332                               OriginalLoad->getAddressSpace(), Alignment,
19333                               OriginalLoad->getMemOperand()->getFlags(),
19334                               &IsFast) ||
19335       !IsFast)
19336     return SDValue();
19337 
19338   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19339                                                InVecVT, EltNo);
19340 
19341   // We are replacing a vector load with a scalar load. The new load must have
19342   // identical memory op ordering to the original.
19343   SDValue Load;
19344   if (ResultVT.bitsGT(VecEltVT)) {
19345     // If the result type of vextract is wider than the load, then issue an
19346     // extending load instead.
19347     ISD::LoadExtType ExtType =
19348         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
19349                                                               : ISD::EXTLOAD;
19350     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
19351                           NewPtr, MPI, VecEltVT, Alignment,
19352                           OriginalLoad->getMemOperand()->getFlags(),
19353                           OriginalLoad->getAAInfo());
19354     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19355   } else {
19356     // The result type is narrower or the same width as the vector element
19357     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
19358                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
19359                        OriginalLoad->getAAInfo());
19360     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19361     if (ResultVT.bitsLT(VecEltVT))
19362       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
19363     else
19364       Load = DAG.getBitcast(ResultVT, Load);
19365   }
19366   ++OpsNarrowed;
19367   return Load;
19368 }
19369 
19370 /// Transform a vector binary operation into a scalar binary operation by moving
19371 /// the math/logic after an extract element of a vector.
19372 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19373                                        bool LegalOperations) {
19374   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19375   SDValue Vec = ExtElt->getOperand(0);
19376   SDValue Index = ExtElt->getOperand(1);
19377   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19378   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19379       Vec->getNumValues() != 1)
19380     return SDValue();
19381 
19382   // Targets may want to avoid this to prevent an expensive register transfer.
19383   if (!TLI.shouldScalarizeBinop(Vec))
19384     return SDValue();
19385 
19386   // Extracting an element of a vector constant is constant-folded, so this
19387   // transform is just replacing a vector op with a scalar op while moving the
19388   // extract.
19389   SDValue Op0 = Vec.getOperand(0);
19390   SDValue Op1 = Vec.getOperand(1);
19391   if (isAnyConstantBuildVector(Op0, true) ||
19392       isAnyConstantBuildVector(Op1, true)) {
19393     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19394     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19395     SDLoc DL(ExtElt);
19396     EVT VT = ExtElt->getValueType(0);
19397     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19398     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19399     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19400   }
19401 
19402   return SDValue();
19403 }
19404 
19405 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19406   SDValue VecOp = N->getOperand(0);
19407   SDValue Index = N->getOperand(1);
19408   EVT ScalarVT = N->getValueType(0);
19409   EVT VecVT = VecOp.getValueType();
19410   if (VecOp.isUndef())
19411     return DAG.getUNDEF(ScalarVT);
19412 
19413   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19414   //
19415   // This only really matters if the index is non-constant since other combines
19416   // on the constant elements already work.
19417   SDLoc DL(N);
19418   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19419       Index == VecOp.getOperand(2)) {
19420     SDValue Elt = VecOp.getOperand(1);
19421     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19422   }
19423 
19424   // (vextract (scalar_to_vector val, 0) -> val
19425   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19426     // Only 0'th element of SCALAR_TO_VECTOR is defined.
19427     if (DAG.isKnownNeverZero(Index))
19428       return DAG.getUNDEF(ScalarVT);
19429 
19430     // Check if the result type doesn't match the inserted element type. A
19431     // SCALAR_TO_VECTOR may truncate the inserted element and the
19432     // EXTRACT_VECTOR_ELT may widen the extracted vector.
19433     SDValue InOp = VecOp.getOperand(0);
19434     if (InOp.getValueType() != ScalarVT) {
19435       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19436       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19437     }
19438     return InOp;
19439   }
19440 
19441   // extract_vector_elt of out-of-bounds element -> UNDEF
19442   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19443   if (IndexC && VecVT.isFixedLengthVector() &&
19444       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19445     return DAG.getUNDEF(ScalarVT);
19446 
19447   // extract_vector_elt (build_vector x, y), 1 -> y
19448   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19449        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19450       TLI.isTypeLegal(VecVT) &&
19451       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19452     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19453             VecVT.isFixedLengthVector()) &&
19454            "BUILD_VECTOR used for scalable vectors");
19455     unsigned IndexVal =
19456         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19457     SDValue Elt = VecOp.getOperand(IndexVal);
19458     EVT InEltVT = Elt.getValueType();
19459 
19460     // Sometimes build_vector's scalar input types do not match result type.
19461     if (ScalarVT == InEltVT)
19462       return Elt;
19463 
19464     // TODO: It may be useful to truncate if free if the build_vector implicitly
19465     // converts.
19466   }
19467 
19468   if (VecVT.isScalableVector())
19469     return SDValue();
19470 
19471   // All the code from this point onwards assumes fixed width vectors, but it's
19472   // possible that some of the combinations could be made to work for scalable
19473   // vectors too.
19474   unsigned NumElts = VecVT.getVectorNumElements();
19475   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19476 
19477   // TODO: These transforms should not require the 'hasOneUse' restriction, but
19478   // there are regressions on multiple targets without it. We can end up with a
19479   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19480   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19481       VecOp.hasOneUse()) {
19482     // The vector index of the LSBs of the source depend on the endian-ness.
19483     bool IsLE = DAG.getDataLayout().isLittleEndian();
19484     unsigned ExtractIndex = IndexC->getZExtValue();
19485     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19486     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19487     SDValue BCSrc = VecOp.getOperand(0);
19488     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19489       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19490 
19491     if (LegalTypes && BCSrc.getValueType().isInteger() &&
19492         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19493       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19494       // trunc i64 X to i32
19495       SDValue X = BCSrc.getOperand(0);
19496       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19497              "Extract element and scalar to vector can't change element type "
19498              "from FP to integer.");
19499       unsigned XBitWidth = X.getValueSizeInBits();
19500       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19501 
19502       // An extract element return value type can be wider than its vector
19503       // operand element type. In that case, the high bits are undefined, so
19504       // it's possible that we may need to extend rather than truncate.
19505       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19506         assert(XBitWidth % VecEltBitWidth == 0 &&
19507                "Scalar bitwidth must be a multiple of vector element bitwidth");
19508         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19509       }
19510     }
19511   }
19512 
19513   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19514     return BO;
19515 
19516   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19517   // We only perform this optimization before the op legalization phase because
19518   // we may introduce new vector instructions which are not backed by TD
19519   // patterns. For example on AVX, extracting elements from a wide vector
19520   // without using extract_subvector. However, if we can find an underlying
19521   // scalar value, then we can always use that.
19522   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19523     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19524     // Find the new index to extract from.
19525     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19526 
19527     // Extracting an undef index is undef.
19528     if (OrigElt == -1)
19529       return DAG.getUNDEF(ScalarVT);
19530 
19531     // Select the right vector half to extract from.
19532     SDValue SVInVec;
19533     if (OrigElt < (int)NumElts) {
19534       SVInVec = VecOp.getOperand(0);
19535     } else {
19536       SVInVec = VecOp.getOperand(1);
19537       OrigElt -= NumElts;
19538     }
19539 
19540     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19541       SDValue InOp = SVInVec.getOperand(OrigElt);
19542       if (InOp.getValueType() != ScalarVT) {
19543         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19544         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19545       }
19546 
19547       return InOp;
19548     }
19549 
19550     // FIXME: We should handle recursing on other vector shuffles and
19551     // scalar_to_vector here as well.
19552 
19553     if (!LegalOperations ||
19554         // FIXME: Should really be just isOperationLegalOrCustom.
19555         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19556         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19557       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19558                          DAG.getVectorIdxConstant(OrigElt, DL));
19559     }
19560   }
19561 
19562   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19563   // simplify it based on the (valid) extraction indices.
19564   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19565         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19566                Use->getOperand(0) == VecOp &&
19567                isa<ConstantSDNode>(Use->getOperand(1));
19568       })) {
19569     APInt DemandedElts = APInt::getZero(NumElts);
19570     for (SDNode *Use : VecOp->uses()) {
19571       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19572       if (CstElt->getAPIntValue().ult(NumElts))
19573         DemandedElts.setBit(CstElt->getZExtValue());
19574     }
19575     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19576       // We simplified the vector operand of this extract element. If this
19577       // extract is not dead, visit it again so it is folded properly.
19578       if (N->getOpcode() != ISD::DELETED_NODE)
19579         AddToWorklist(N);
19580       return SDValue(N, 0);
19581     }
19582     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19583     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19584       // We simplified the vector operand of this extract element. If this
19585       // extract is not dead, visit it again so it is folded properly.
19586       if (N->getOpcode() != ISD::DELETED_NODE)
19587         AddToWorklist(N);
19588       return SDValue(N, 0);
19589     }
19590   }
19591 
19592   // Everything under here is trying to match an extract of a loaded value.
19593   // If the result of load has to be truncated, then it's not necessarily
19594   // profitable.
19595   bool BCNumEltsChanged = false;
19596   EVT ExtVT = VecVT.getVectorElementType();
19597   EVT LVT = ExtVT;
19598   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19599     return SDValue();
19600 
19601   if (VecOp.getOpcode() == ISD::BITCAST) {
19602     // Don't duplicate a load with other uses.
19603     if (!VecOp.hasOneUse())
19604       return SDValue();
19605 
19606     EVT BCVT = VecOp.getOperand(0).getValueType();
19607     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19608       return SDValue();
19609     if (NumElts != BCVT.getVectorNumElements())
19610       BCNumEltsChanged = true;
19611     VecOp = VecOp.getOperand(0);
19612     ExtVT = BCVT.getVectorElementType();
19613   }
19614 
19615   // extract (vector load $addr), i --> load $addr + i * size
19616   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19617       ISD::isNormalLoad(VecOp.getNode()) &&
19618       !Index->hasPredecessor(VecOp.getNode())) {
19619     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19620     if (VecLoad && VecLoad->isSimple())
19621       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19622   }
19623 
19624   // Perform only after legalization to ensure build_vector / vector_shuffle
19625   // optimizations have already been done.
19626   if (!LegalOperations || !IndexC)
19627     return SDValue();
19628 
19629   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19630   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19631   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19632   int Elt = IndexC->getZExtValue();
19633   LoadSDNode *LN0 = nullptr;
19634   if (ISD::isNormalLoad(VecOp.getNode())) {
19635     LN0 = cast<LoadSDNode>(VecOp);
19636   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19637              VecOp.getOperand(0).getValueType() == ExtVT &&
19638              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19639     // Don't duplicate a load with other uses.
19640     if (!VecOp.hasOneUse())
19641       return SDValue();
19642 
19643     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19644   }
19645   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19646     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19647     // =>
19648     // (load $addr+1*size)
19649 
19650     // Don't duplicate a load with other uses.
19651     if (!VecOp.hasOneUse())
19652       return SDValue();
19653 
19654     // If the bit convert changed the number of elements, it is unsafe
19655     // to examine the mask.
19656     if (BCNumEltsChanged)
19657       return SDValue();
19658 
19659     // Select the input vector, guarding against out of range extract vector.
19660     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19661     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19662 
19663     if (VecOp.getOpcode() == ISD::BITCAST) {
19664       // Don't duplicate a load with other uses.
19665       if (!VecOp.hasOneUse())
19666         return SDValue();
19667 
19668       VecOp = VecOp.getOperand(0);
19669     }
19670     if (ISD::isNormalLoad(VecOp.getNode())) {
19671       LN0 = cast<LoadSDNode>(VecOp);
19672       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19673       Index = DAG.getConstant(Elt, DL, Index.getValueType());
19674     }
19675   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19676              VecVT.getVectorElementType() == ScalarVT &&
19677              (!LegalTypes ||
19678               TLI.isTypeLegal(
19679                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19680     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19681     //      -> extract_vector_elt a, 0
19682     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19683     //      -> extract_vector_elt a, 1
19684     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19685     //      -> extract_vector_elt b, 0
19686     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19687     //      -> extract_vector_elt b, 1
19688     SDLoc SL(N);
19689     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19690     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19691     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19692                                      Index.getValueType());
19693 
19694     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19695     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19696                               ConcatVT.getVectorElementType(),
19697                               ConcatOp, NewIdx);
19698     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19699   }
19700 
19701   // Make sure we found a non-volatile load and the extractelement is
19702   // the only use.
19703   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19704     return SDValue();
19705 
19706   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19707   if (Elt == -1)
19708     return DAG.getUNDEF(LVT);
19709 
19710   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19711 }
19712 
19713 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19714 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19715   // We perform this optimization post type-legalization because
19716   // the type-legalizer often scalarizes integer-promoted vectors.
19717   // Performing this optimization before may create bit-casts which
19718   // will be type-legalized to complex code sequences.
19719   // We perform this optimization only before the operation legalizer because we
19720   // may introduce illegal operations.
19721   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19722     return SDValue();
19723 
19724   unsigned NumInScalars = N->getNumOperands();
19725   SDLoc DL(N);
19726   EVT VT = N->getValueType(0);
19727 
19728   // Check to see if this is a BUILD_VECTOR of a bunch of values
19729   // which come from any_extend or zero_extend nodes. If so, we can create
19730   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19731   // optimizations. We do not handle sign-extend because we can't fill the sign
19732   // using shuffles.
19733   EVT SourceType = MVT::Other;
19734   bool AllAnyExt = true;
19735 
19736   for (unsigned i = 0; i != NumInScalars; ++i) {
19737     SDValue In = N->getOperand(i);
19738     // Ignore undef inputs.
19739     if (In.isUndef()) continue;
19740 
19741     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
19742     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
19743 
19744     // Abort if the element is not an extension.
19745     if (!ZeroExt && !AnyExt) {
19746       SourceType = MVT::Other;
19747       break;
19748     }
19749 
19750     // The input is a ZeroExt or AnyExt. Check the original type.
19751     EVT InTy = In.getOperand(0).getValueType();
19752 
19753     // Check that all of the widened source types are the same.
19754     if (SourceType == MVT::Other)
19755       // First time.
19756       SourceType = InTy;
19757     else if (InTy != SourceType) {
19758       // Multiple income types. Abort.
19759       SourceType = MVT::Other;
19760       break;
19761     }
19762 
19763     // Check if all of the extends are ANY_EXTENDs.
19764     AllAnyExt &= AnyExt;
19765   }
19766 
19767   // In order to have valid types, all of the inputs must be extended from the
19768   // same source type and all of the inputs must be any or zero extend.
19769   // Scalar sizes must be a power of two.
19770   EVT OutScalarTy = VT.getScalarType();
19771   bool ValidTypes = SourceType != MVT::Other &&
19772                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
19773                  isPowerOf2_32(SourceType.getSizeInBits());
19774 
19775   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
19776   // turn into a single shuffle instruction.
19777   if (!ValidTypes)
19778     return SDValue();
19779 
19780   // If we already have a splat buildvector, then don't fold it if it means
19781   // introducing zeros.
19782   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
19783     return SDValue();
19784 
19785   bool isLE = DAG.getDataLayout().isLittleEndian();
19786   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
19787   assert(ElemRatio > 1 && "Invalid element size ratio");
19788   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
19789                                DAG.getConstant(0, DL, SourceType);
19790 
19791   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
19792   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
19793 
19794   // Populate the new build_vector
19795   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19796     SDValue Cast = N->getOperand(i);
19797     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
19798             Cast.getOpcode() == ISD::ZERO_EXTEND ||
19799             Cast.isUndef()) && "Invalid cast opcode");
19800     SDValue In;
19801     if (Cast.isUndef())
19802       In = DAG.getUNDEF(SourceType);
19803     else
19804       In = Cast->getOperand(0);
19805     unsigned Index = isLE ? (i * ElemRatio) :
19806                             (i * ElemRatio + (ElemRatio - 1));
19807 
19808     assert(Index < Ops.size() && "Invalid index");
19809     Ops[Index] = In;
19810   }
19811 
19812   // The type of the new BUILD_VECTOR node.
19813   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
19814   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
19815          "Invalid vector size");
19816   // Check if the new vector type is legal.
19817   if (!isTypeLegal(VecVT) ||
19818       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
19819        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
19820     return SDValue();
19821 
19822   // Make the new BUILD_VECTOR.
19823   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
19824 
19825   // The new BUILD_VECTOR node has the potential to be further optimized.
19826   AddToWorklist(BV.getNode());
19827   // Bitcast to the desired type.
19828   return DAG.getBitcast(VT, BV);
19829 }
19830 
19831 // Simplify (build_vec (trunc $1)
19832 //                     (trunc (srl $1 half-width))
19833 //                     (trunc (srl $1 (2 * half-width))) …)
19834 // to (bitcast $1)
19835 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
19836   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19837 
19838   // Only for little endian
19839   if (!DAG.getDataLayout().isLittleEndian())
19840     return SDValue();
19841 
19842   SDLoc DL(N);
19843   EVT VT = N->getValueType(0);
19844   EVT OutScalarTy = VT.getScalarType();
19845   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
19846 
19847   // Only for power of two types to be sure that bitcast works well
19848   if (!isPowerOf2_64(ScalarTypeBitsize))
19849     return SDValue();
19850 
19851   unsigned NumInScalars = N->getNumOperands();
19852 
19853   // Look through bitcasts
19854   auto PeekThroughBitcast = [](SDValue Op) {
19855     if (Op.getOpcode() == ISD::BITCAST)
19856       return Op.getOperand(0);
19857     return Op;
19858   };
19859 
19860   // The source value where all the parts are extracted.
19861   SDValue Src;
19862   for (unsigned i = 0; i != NumInScalars; ++i) {
19863     SDValue In = PeekThroughBitcast(N->getOperand(i));
19864     // Ignore undef inputs.
19865     if (In.isUndef()) continue;
19866 
19867     if (In.getOpcode() != ISD::TRUNCATE)
19868       return SDValue();
19869 
19870     In = PeekThroughBitcast(In.getOperand(0));
19871 
19872     if (In.getOpcode() != ISD::SRL) {
19873       // For now only build_vec without shuffling, handle shifts here in the
19874       // future.
19875       if (i != 0)
19876         return SDValue();
19877 
19878       Src = In;
19879     } else {
19880       // In is SRL
19881       SDValue part = PeekThroughBitcast(In.getOperand(0));
19882 
19883       if (!Src) {
19884         Src = part;
19885       } else if (Src != part) {
19886         // Vector parts do not stem from the same variable
19887         return SDValue();
19888       }
19889 
19890       SDValue ShiftAmtVal = In.getOperand(1);
19891       if (!isa<ConstantSDNode>(ShiftAmtVal))
19892         return SDValue();
19893 
19894       uint64_t ShiftAmt = In.getConstantOperandVal(1);
19895 
19896       // The extracted value is not extracted at the right position
19897       if (ShiftAmt != i * ScalarTypeBitsize)
19898         return SDValue();
19899     }
19900   }
19901 
19902   // Only cast if the size is the same
19903   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19904     return SDValue();
19905 
19906   return DAG.getBitcast(VT, Src);
19907 }
19908 
19909 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19910                                            ArrayRef<int> VectorMask,
19911                                            SDValue VecIn1, SDValue VecIn2,
19912                                            unsigned LeftIdx, bool DidSplitVec) {
19913   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19914 
19915   EVT VT = N->getValueType(0);
19916   EVT InVT1 = VecIn1.getValueType();
19917   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19918 
19919   unsigned NumElems = VT.getVectorNumElements();
19920   unsigned ShuffleNumElems = NumElems;
19921 
19922   // If we artificially split a vector in two already, then the offsets in the
19923   // operands will all be based off of VecIn1, even those in VecIn2.
19924   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19925 
19926   uint64_t VTSize = VT.getFixedSizeInBits();
19927   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19928   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19929 
19930   assert(InVT2Size <= InVT1Size &&
19931          "Inputs must be sorted to be in non-increasing vector size order.");
19932 
19933   // We can't generate a shuffle node with mismatched input and output types.
19934   // Try to make the types match the type of the output.
19935   if (InVT1 != VT || InVT2 != VT) {
19936     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19937       // If the output vector length is a multiple of both input lengths,
19938       // we can concatenate them and pad the rest with undefs.
19939       unsigned NumConcats = VTSize / InVT1Size;
19940       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19941       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19942       ConcatOps[0] = VecIn1;
19943       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19944       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19945       VecIn2 = SDValue();
19946     } else if (InVT1Size == VTSize * 2) {
19947       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19948         return SDValue();
19949 
19950       if (!VecIn2.getNode()) {
19951         // If we only have one input vector, and it's twice the size of the
19952         // output, split it in two.
19953         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19954                              DAG.getVectorIdxConstant(NumElems, DL));
19955         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19956         // Since we now have shorter input vectors, adjust the offset of the
19957         // second vector's start.
19958         Vec2Offset = NumElems;
19959       } else {
19960         assert(InVT2Size <= InVT1Size &&
19961                "Second input is not going to be larger than the first one.");
19962 
19963         // VecIn1 is wider than the output, and we have another, possibly
19964         // smaller input. Pad the smaller input with undefs, shuffle at the
19965         // input vector width, and extract the output.
19966         // The shuffle type is different than VT, so check legality again.
19967         if (LegalOperations &&
19968             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19969           return SDValue();
19970 
19971         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19972         // lower it back into a BUILD_VECTOR. So if the inserted type is
19973         // illegal, don't even try.
19974         if (InVT1 != InVT2) {
19975           if (!TLI.isTypeLegal(InVT2))
19976             return SDValue();
19977           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19978                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19979         }
19980         ShuffleNumElems = NumElems * 2;
19981       }
19982     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19983       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19984       ConcatOps[0] = VecIn2;
19985       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19986     } else {
19987       // TODO: Support cases where the length mismatch isn't exactly by a
19988       // factor of 2.
19989       // TODO: Move this check upwards, so that if we have bad type
19990       // mismatches, we don't create any DAG nodes.
19991       return SDValue();
19992     }
19993   }
19994 
19995   // Initialize mask to undef.
19996   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19997 
19998   // Only need to run up to the number of elements actually used, not the
19999   // total number of elements in the shuffle - if we are shuffling a wider
20000   // vector, the high lanes should be set to undef.
20001   for (unsigned i = 0; i != NumElems; ++i) {
20002     if (VectorMask[i] <= 0)
20003       continue;
20004 
20005     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
20006     if (VectorMask[i] == (int)LeftIdx) {
20007       Mask[i] = ExtIndex;
20008     } else if (VectorMask[i] == (int)LeftIdx + 1) {
20009       Mask[i] = Vec2Offset + ExtIndex;
20010     }
20011   }
20012 
20013   // The type the input vectors may have changed above.
20014   InVT1 = VecIn1.getValueType();
20015 
20016   // If we already have a VecIn2, it should have the same type as VecIn1.
20017   // If we don't, get an undef/zero vector of the appropriate type.
20018   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
20019   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
20020 
20021   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
20022   if (ShuffleNumElems > NumElems)
20023     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
20024 
20025   return Shuffle;
20026 }
20027 
20028 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
20029   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
20030 
20031   // First, determine where the build vector is not undef.
20032   // TODO: We could extend this to handle zero elements as well as undefs.
20033   int NumBVOps = BV->getNumOperands();
20034   int ZextElt = -1;
20035   for (int i = 0; i != NumBVOps; ++i) {
20036     SDValue Op = BV->getOperand(i);
20037     if (Op.isUndef())
20038       continue;
20039     if (ZextElt == -1)
20040       ZextElt = i;
20041     else
20042       return SDValue();
20043   }
20044   // Bail out if there's no non-undef element.
20045   if (ZextElt == -1)
20046     return SDValue();
20047 
20048   // The build vector contains some number of undef elements and exactly
20049   // one other element. That other element must be a zero-extended scalar
20050   // extracted from a vector at a constant index to turn this into a shuffle.
20051   // Also, require that the build vector does not implicitly truncate/extend
20052   // its elements.
20053   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
20054   EVT VT = BV->getValueType(0);
20055   SDValue Zext = BV->getOperand(ZextElt);
20056   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
20057       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20058       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
20059       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
20060     return SDValue();
20061 
20062   // The zero-extend must be a multiple of the source size, and we must be
20063   // building a vector of the same size as the source of the extract element.
20064   SDValue Extract = Zext.getOperand(0);
20065   unsigned DestSize = Zext.getValueSizeInBits();
20066   unsigned SrcSize = Extract.getValueSizeInBits();
20067   if (DestSize % SrcSize != 0 ||
20068       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
20069     return SDValue();
20070 
20071   // Create a shuffle mask that will combine the extracted element with zeros
20072   // and undefs.
20073   int ZextRatio = DestSize / SrcSize;
20074   int NumMaskElts = NumBVOps * ZextRatio;
20075   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
20076   for (int i = 0; i != NumMaskElts; ++i) {
20077     if (i / ZextRatio == ZextElt) {
20078       // The low bits of the (potentially translated) extracted element map to
20079       // the source vector. The high bits map to zero. We will use a zero vector
20080       // as the 2nd source operand of the shuffle, so use the 1st element of
20081       // that vector (mask value is number-of-elements) for the high bits.
20082       if (i % ZextRatio == 0)
20083         ShufMask[i] = Extract.getConstantOperandVal(1);
20084       else
20085         ShufMask[i] = NumMaskElts;
20086     }
20087 
20088     // Undef elements of the build vector remain undef because we initialize
20089     // the shuffle mask with -1.
20090   }
20091 
20092   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
20093   // bitcast (shuffle V, ZeroVec, VectorMask)
20094   SDLoc DL(BV);
20095   EVT VecVT = Extract.getOperand(0).getValueType();
20096   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
20097   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20098   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
20099                                              ZeroVec, ShufMask, DAG);
20100   if (!Shuf)
20101     return SDValue();
20102   return DAG.getBitcast(VT, Shuf);
20103 }
20104 
20105 // FIXME: promote to STLExtras.
20106 template <typename R, typename T>
20107 static auto getFirstIndexOf(R &&Range, const T &Val) {
20108   auto I = find(Range, Val);
20109   if (I == Range.end())
20110     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
20111   return std::distance(Range.begin(), I);
20112 }
20113 
20114 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
20115 // operations. If the types of the vectors we're extracting from allow it,
20116 // turn this into a vector_shuffle node.
20117 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
20118   SDLoc DL(N);
20119   EVT VT = N->getValueType(0);
20120 
20121   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
20122   if (!isTypeLegal(VT))
20123     return SDValue();
20124 
20125   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
20126     return V;
20127 
20128   // May only combine to shuffle after legalize if shuffle is legal.
20129   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
20130     return SDValue();
20131 
20132   bool UsesZeroVector = false;
20133   unsigned NumElems = N->getNumOperands();
20134 
20135   // Record, for each element of the newly built vector, which input vector
20136   // that element comes from. -1 stands for undef, 0 for the zero vector,
20137   // and positive values for the input vectors.
20138   // VectorMask maps each element to its vector number, and VecIn maps vector
20139   // numbers to their initial SDValues.
20140 
20141   SmallVector<int, 8> VectorMask(NumElems, -1);
20142   SmallVector<SDValue, 8> VecIn;
20143   VecIn.push_back(SDValue());
20144 
20145   for (unsigned i = 0; i != NumElems; ++i) {
20146     SDValue Op = N->getOperand(i);
20147 
20148     if (Op.isUndef())
20149       continue;
20150 
20151     // See if we can use a blend with a zero vector.
20152     // TODO: Should we generalize this to a blend with an arbitrary constant
20153     // vector?
20154     if (isNullConstant(Op) || isNullFPConstant(Op)) {
20155       UsesZeroVector = true;
20156       VectorMask[i] = 0;
20157       continue;
20158     }
20159 
20160     // Not an undef or zero. If the input is something other than an
20161     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
20162     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20163         !isa<ConstantSDNode>(Op.getOperand(1)))
20164       return SDValue();
20165     SDValue ExtractedFromVec = Op.getOperand(0);
20166 
20167     if (ExtractedFromVec.getValueType().isScalableVector())
20168       return SDValue();
20169 
20170     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
20171     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
20172       return SDValue();
20173 
20174     // All inputs must have the same element type as the output.
20175     if (VT.getVectorElementType() !=
20176         ExtractedFromVec.getValueType().getVectorElementType())
20177       return SDValue();
20178 
20179     // Have we seen this input vector before?
20180     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
20181     // a map back from SDValues to numbers isn't worth it.
20182     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
20183     if (Idx == -1) { // A new source vector?
20184       Idx = VecIn.size();
20185       VecIn.push_back(ExtractedFromVec);
20186     }
20187 
20188     VectorMask[i] = Idx;
20189   }
20190 
20191   // If we didn't find at least one input vector, bail out.
20192   if (VecIn.size() < 2)
20193     return SDValue();
20194 
20195   // If all the Operands of BUILD_VECTOR extract from same
20196   // vector, then split the vector efficiently based on the maximum
20197   // vector access index and adjust the VectorMask and
20198   // VecIn accordingly.
20199   bool DidSplitVec = false;
20200   if (VecIn.size() == 2) {
20201     unsigned MaxIndex = 0;
20202     unsigned NearestPow2 = 0;
20203     SDValue Vec = VecIn.back();
20204     EVT InVT = Vec.getValueType();
20205     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
20206 
20207     for (unsigned i = 0; i < NumElems; i++) {
20208       if (VectorMask[i] <= 0)
20209         continue;
20210       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
20211       IndexVec[i] = Index;
20212       MaxIndex = std::max(MaxIndex, Index);
20213     }
20214 
20215     NearestPow2 = PowerOf2Ceil(MaxIndex);
20216     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
20217         NumElems * 2 < NearestPow2) {
20218       unsigned SplitSize = NearestPow2 / 2;
20219       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
20220                                      InVT.getVectorElementType(), SplitSize);
20221       if (TLI.isTypeLegal(SplitVT) &&
20222           SplitSize + SplitVT.getVectorNumElements() <=
20223               InVT.getVectorNumElements()) {
20224         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20225                                      DAG.getVectorIdxConstant(SplitSize, DL));
20226         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20227                                      DAG.getVectorIdxConstant(0, DL));
20228         VecIn.pop_back();
20229         VecIn.push_back(VecIn1);
20230         VecIn.push_back(VecIn2);
20231         DidSplitVec = true;
20232 
20233         for (unsigned i = 0; i < NumElems; i++) {
20234           if (VectorMask[i] <= 0)
20235             continue;
20236           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20237         }
20238       }
20239     }
20240   }
20241 
20242   // Sort input vectors by decreasing vector element count,
20243   // while preserving the relative order of equally-sized vectors.
20244   // Note that we keep the first "implicit zero vector as-is.
20245   SmallVector<SDValue, 8> SortedVecIn(VecIn);
20246   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20247                     [](const SDValue &a, const SDValue &b) {
20248                       return a.getValueType().getVectorNumElements() >
20249                              b.getValueType().getVectorNumElements();
20250                     });
20251 
20252   // We now also need to rebuild the VectorMask, because it referenced element
20253   // order in VecIn, and we just sorted them.
20254   for (int &SourceVectorIndex : VectorMask) {
20255     if (SourceVectorIndex <= 0)
20256       continue;
20257     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20258     assert(Idx > 0 && Idx < SortedVecIn.size() &&
20259            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20260     SourceVectorIndex = Idx;
20261   }
20262 
20263   VecIn = std::move(SortedVecIn);
20264 
20265   // TODO: Should this fire if some of the input vectors has illegal type (like
20266   // it does now), or should we let legalization run its course first?
20267 
20268   // Shuffle phase:
20269   // Take pairs of vectors, and shuffle them so that the result has elements
20270   // from these vectors in the correct places.
20271   // For example, given:
20272   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20273   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20274   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20275   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20276   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20277   // We will generate:
20278   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20279   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20280   SmallVector<SDValue, 4> Shuffles;
20281   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20282     unsigned LeftIdx = 2 * In + 1;
20283     SDValue VecLeft = VecIn[LeftIdx];
20284     SDValue VecRight =
20285         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20286 
20287     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20288                                                 VecRight, LeftIdx, DidSplitVec))
20289       Shuffles.push_back(Shuffle);
20290     else
20291       return SDValue();
20292   }
20293 
20294   // If we need the zero vector as an "ingredient" in the blend tree, add it
20295   // to the list of shuffles.
20296   if (UsesZeroVector)
20297     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20298                                       : DAG.getConstantFP(0.0, DL, VT));
20299 
20300   // If we only have one shuffle, we're done.
20301   if (Shuffles.size() == 1)
20302     return Shuffles[0];
20303 
20304   // Update the vector mask to point to the post-shuffle vectors.
20305   for (int &Vec : VectorMask)
20306     if (Vec == 0)
20307       Vec = Shuffles.size() - 1;
20308     else
20309       Vec = (Vec - 1) / 2;
20310 
20311   // More than one shuffle. Generate a binary tree of blends, e.g. if from
20312   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20313   // generate:
20314   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20315   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20316   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20317   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20318   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20319   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20320   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20321 
20322   // Make sure the initial size of the shuffle list is even.
20323   if (Shuffles.size() % 2)
20324     Shuffles.push_back(DAG.getUNDEF(VT));
20325 
20326   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20327     if (CurSize % 2) {
20328       Shuffles[CurSize] = DAG.getUNDEF(VT);
20329       CurSize++;
20330     }
20331     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20332       int Left = 2 * In;
20333       int Right = 2 * In + 1;
20334       SmallVector<int, 8> Mask(NumElems, -1);
20335       for (unsigned i = 0; i != NumElems; ++i) {
20336         if (VectorMask[i] == Left) {
20337           Mask[i] = i;
20338           VectorMask[i] = In;
20339         } else if (VectorMask[i] == Right) {
20340           Mask[i] = i + NumElems;
20341           VectorMask[i] = In;
20342         }
20343       }
20344 
20345       Shuffles[In] =
20346           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
20347     }
20348   }
20349   return Shuffles[0];
20350 }
20351 
20352 // Try to turn a build vector of zero extends of extract vector elts into a
20353 // a vector zero extend and possibly an extract subvector.
20354 // TODO: Support sign extend?
20355 // TODO: Allow undef elements?
20356 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20357   if (LegalOperations)
20358     return SDValue();
20359 
20360   EVT VT = N->getValueType(0);
20361 
20362   bool FoundZeroExtend = false;
20363   SDValue Op0 = N->getOperand(0);
20364   auto checkElem = [&](SDValue Op) -> int64_t {
20365     unsigned Opc = Op.getOpcode();
20366     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20367     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20368         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20369         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20370       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20371         return C->getZExtValue();
20372     return -1;
20373   };
20374 
20375   // Make sure the first element matches
20376   // (zext (extract_vector_elt X, C))
20377   // Offset must be a constant multiple of the
20378   // known-minimum vector length of the result type.
20379   int64_t Offset = checkElem(Op0);
20380   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20381     return SDValue();
20382 
20383   unsigned NumElems = N->getNumOperands();
20384   SDValue In = Op0.getOperand(0).getOperand(0);
20385   EVT InSVT = In.getValueType().getScalarType();
20386   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20387 
20388   // Don't create an illegal input type after type legalization.
20389   if (LegalTypes && !TLI.isTypeLegal(InVT))
20390     return SDValue();
20391 
20392   // Ensure all the elements come from the same vector and are adjacent.
20393   for (unsigned i = 1; i != NumElems; ++i) {
20394     if ((Offset + i) != checkElem(N->getOperand(i)))
20395       return SDValue();
20396   }
20397 
20398   SDLoc DL(N);
20399   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20400                    Op0.getOperand(0).getOperand(1));
20401   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20402                      VT, In);
20403 }
20404 
20405 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20406   EVT VT = N->getValueType(0);
20407 
20408   // A vector built entirely of undefs is undef.
20409   if (ISD::allOperandsUndef(N))
20410     return DAG.getUNDEF(VT);
20411 
20412   // If this is a splat of a bitcast from another vector, change to a
20413   // concat_vector.
20414   // For example:
20415   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20416   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20417   //
20418   // If X is a build_vector itself, the concat can become a larger build_vector.
20419   // TODO: Maybe this is useful for non-splat too?
20420   if (!LegalOperations) {
20421     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20422       Splat = peekThroughBitcasts(Splat);
20423       EVT SrcVT = Splat.getValueType();
20424       if (SrcVT.isVector()) {
20425         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20426         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20427                                      SrcVT.getVectorElementType(), NumElts);
20428         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20429           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20430           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20431                                        NewVT, Ops);
20432           return DAG.getBitcast(VT, Concat);
20433         }
20434       }
20435     }
20436   }
20437 
20438   // Check if we can express BUILD VECTOR via subvector extract.
20439   if (!LegalTypes && (N->getNumOperands() > 1)) {
20440     SDValue Op0 = N->getOperand(0);
20441     auto checkElem = [&](SDValue Op) -> uint64_t {
20442       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20443           (Op0.getOperand(0) == Op.getOperand(0)))
20444         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20445           return CNode->getZExtValue();
20446       return -1;
20447     };
20448 
20449     int Offset = checkElem(Op0);
20450     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20451       if (Offset + i != checkElem(N->getOperand(i))) {
20452         Offset = -1;
20453         break;
20454       }
20455     }
20456 
20457     if ((Offset == 0) &&
20458         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20459       return Op0.getOperand(0);
20460     if ((Offset != -1) &&
20461         ((Offset % N->getValueType(0).getVectorNumElements()) ==
20462          0)) // IDX must be multiple of output size.
20463       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20464                          Op0.getOperand(0), Op0.getOperand(1));
20465   }
20466 
20467   if (SDValue V = convertBuildVecZextToZext(N))
20468     return V;
20469 
20470   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20471     return V;
20472 
20473   if (SDValue V = reduceBuildVecTruncToBitCast(N))
20474     return V;
20475 
20476   if (SDValue V = reduceBuildVecToShuffle(N))
20477     return V;
20478 
20479   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20480   // Do this late as some of the above may replace the splat.
20481   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20482     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20483       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20484       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20485     }
20486 
20487   return SDValue();
20488 }
20489 
20490 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20491   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20492   EVT OpVT = N->getOperand(0).getValueType();
20493 
20494   // If the operands are legal vectors, leave them alone.
20495   if (TLI.isTypeLegal(OpVT))
20496     return SDValue();
20497 
20498   SDLoc DL(N);
20499   EVT VT = N->getValueType(0);
20500   SmallVector<SDValue, 8> Ops;
20501 
20502   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20503   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20504 
20505   // Keep track of what we encounter.
20506   bool AnyInteger = false;
20507   bool AnyFP = false;
20508   for (const SDValue &Op : N->ops()) {
20509     if (ISD::BITCAST == Op.getOpcode() &&
20510         !Op.getOperand(0).getValueType().isVector())
20511       Ops.push_back(Op.getOperand(0));
20512     else if (ISD::UNDEF == Op.getOpcode())
20513       Ops.push_back(ScalarUndef);
20514     else
20515       return SDValue();
20516 
20517     // Note whether we encounter an integer or floating point scalar.
20518     // If it's neither, bail out, it could be something weird like x86mmx.
20519     EVT LastOpVT = Ops.back().getValueType();
20520     if (LastOpVT.isFloatingPoint())
20521       AnyFP = true;
20522     else if (LastOpVT.isInteger())
20523       AnyInteger = true;
20524     else
20525       return SDValue();
20526   }
20527 
20528   // If any of the operands is a floating point scalar bitcast to a vector,
20529   // use floating point types throughout, and bitcast everything.
20530   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20531   if (AnyFP) {
20532     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20533     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20534     if (AnyInteger) {
20535       for (SDValue &Op : Ops) {
20536         if (Op.getValueType() == SVT)
20537           continue;
20538         if (Op.isUndef())
20539           Op = ScalarUndef;
20540         else
20541           Op = DAG.getBitcast(SVT, Op);
20542       }
20543     }
20544   }
20545 
20546   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20547                                VT.getSizeInBits() / SVT.getSizeInBits());
20548   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20549 }
20550 
20551 // Attempt to merge nested concat_vectors/undefs.
20552 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20553 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20554 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20555                                                   SelectionDAG &DAG) {
20556   EVT VT = N->getValueType(0);
20557 
20558   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20559   EVT SubVT;
20560   SDValue FirstConcat;
20561   for (const SDValue &Op : N->ops()) {
20562     if (Op.isUndef())
20563       continue;
20564     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20565       return SDValue();
20566     if (!FirstConcat) {
20567       SubVT = Op.getOperand(0).getValueType();
20568       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20569         return SDValue();
20570       FirstConcat = Op;
20571       continue;
20572     }
20573     if (SubVT != Op.getOperand(0).getValueType())
20574       return SDValue();
20575   }
20576   assert(FirstConcat && "Concat of all-undefs found");
20577 
20578   SmallVector<SDValue> ConcatOps;
20579   for (const SDValue &Op : N->ops()) {
20580     if (Op.isUndef()) {
20581       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20582       continue;
20583     }
20584     ConcatOps.append(Op->op_begin(), Op->op_end());
20585   }
20586   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20587 }
20588 
20589 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20590 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20591 // most two distinct vectors the same size as the result, attempt to turn this
20592 // into a legal shuffle.
20593 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20594   EVT VT = N->getValueType(0);
20595   EVT OpVT = N->getOperand(0).getValueType();
20596 
20597   // We currently can't generate an appropriate shuffle for a scalable vector.
20598   if (VT.isScalableVector())
20599     return SDValue();
20600 
20601   int NumElts = VT.getVectorNumElements();
20602   int NumOpElts = OpVT.getVectorNumElements();
20603 
20604   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20605   SmallVector<int, 8> Mask;
20606 
20607   for (SDValue Op : N->ops()) {
20608     Op = peekThroughBitcasts(Op);
20609 
20610     // UNDEF nodes convert to UNDEF shuffle mask values.
20611     if (Op.isUndef()) {
20612       Mask.append((unsigned)NumOpElts, -1);
20613       continue;
20614     }
20615 
20616     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20617       return SDValue();
20618 
20619     // What vector are we extracting the subvector from and at what index?
20620     SDValue ExtVec = Op.getOperand(0);
20621     int ExtIdx = Op.getConstantOperandVal(1);
20622 
20623     // We want the EVT of the original extraction to correctly scale the
20624     // extraction index.
20625     EVT ExtVT = ExtVec.getValueType();
20626     ExtVec = peekThroughBitcasts(ExtVec);
20627 
20628     // UNDEF nodes convert to UNDEF shuffle mask values.
20629     if (ExtVec.isUndef()) {
20630       Mask.append((unsigned)NumOpElts, -1);
20631       continue;
20632     }
20633 
20634     // Ensure that we are extracting a subvector from a vector the same
20635     // size as the result.
20636     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20637       return SDValue();
20638 
20639     // Scale the subvector index to account for any bitcast.
20640     int NumExtElts = ExtVT.getVectorNumElements();
20641     if (0 == (NumExtElts % NumElts))
20642       ExtIdx /= (NumExtElts / NumElts);
20643     else if (0 == (NumElts % NumExtElts))
20644       ExtIdx *= (NumElts / NumExtElts);
20645     else
20646       return SDValue();
20647 
20648     // At most we can reference 2 inputs in the final shuffle.
20649     if (SV0.isUndef() || SV0 == ExtVec) {
20650       SV0 = ExtVec;
20651       for (int i = 0; i != NumOpElts; ++i)
20652         Mask.push_back(i + ExtIdx);
20653     } else if (SV1.isUndef() || SV1 == ExtVec) {
20654       SV1 = ExtVec;
20655       for (int i = 0; i != NumOpElts; ++i)
20656         Mask.push_back(i + ExtIdx + NumElts);
20657     } else {
20658       return SDValue();
20659     }
20660   }
20661 
20662   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20663   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20664                                      DAG.getBitcast(VT, SV1), Mask, DAG);
20665 }
20666 
20667 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20668   unsigned CastOpcode = N->getOperand(0).getOpcode();
20669   switch (CastOpcode) {
20670   case ISD::SINT_TO_FP:
20671   case ISD::UINT_TO_FP:
20672   case ISD::FP_TO_SINT:
20673   case ISD::FP_TO_UINT:
20674     // TODO: Allow more opcodes?
20675     //  case ISD::BITCAST:
20676     //  case ISD::TRUNCATE:
20677     //  case ISD::ZERO_EXTEND:
20678     //  case ISD::SIGN_EXTEND:
20679     //  case ISD::FP_EXTEND:
20680     break;
20681   default:
20682     return SDValue();
20683   }
20684 
20685   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20686   if (!SrcVT.isVector())
20687     return SDValue();
20688 
20689   // All operands of the concat must be the same kind of cast from the same
20690   // source type.
20691   SmallVector<SDValue, 4> SrcOps;
20692   for (SDValue Op : N->ops()) {
20693     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20694         Op.getOperand(0).getValueType() != SrcVT)
20695       return SDValue();
20696     SrcOps.push_back(Op.getOperand(0));
20697   }
20698 
20699   // The wider cast must be supported by the target. This is unusual because
20700   // the operation support type parameter depends on the opcode. In addition,
20701   // check the other type in the cast to make sure this is really legal.
20702   EVT VT = N->getValueType(0);
20703   EVT SrcEltVT = SrcVT.getVectorElementType();
20704   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20705   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20706   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20707   switch (CastOpcode) {
20708   case ISD::SINT_TO_FP:
20709   case ISD::UINT_TO_FP:
20710     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20711         !TLI.isTypeLegal(VT))
20712       return SDValue();
20713     break;
20714   case ISD::FP_TO_SINT:
20715   case ISD::FP_TO_UINT:
20716     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
20717         !TLI.isTypeLegal(ConcatSrcVT))
20718       return SDValue();
20719     break;
20720   default:
20721     llvm_unreachable("Unexpected cast opcode");
20722   }
20723 
20724   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
20725   SDLoc DL(N);
20726   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
20727   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
20728 }
20729 
20730 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
20731   // If we only have one input vector, we don't need to do any concatenation.
20732   if (N->getNumOperands() == 1)
20733     return N->getOperand(0);
20734 
20735   // Check if all of the operands are undefs.
20736   EVT VT = N->getValueType(0);
20737   if (ISD::allOperandsUndef(N))
20738     return DAG.getUNDEF(VT);
20739 
20740   // Optimize concat_vectors where all but the first of the vectors are undef.
20741   if (all_of(drop_begin(N->ops()),
20742              [](const SDValue &Op) { return Op.isUndef(); })) {
20743     SDValue In = N->getOperand(0);
20744     assert(In.getValueType().isVector() && "Must concat vectors");
20745 
20746     // If the input is a concat_vectors, just make a larger concat by padding
20747     // with smaller undefs.
20748     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
20749       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
20750       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
20751       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
20752       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20753     }
20754 
20755     SDValue Scalar = peekThroughOneUseBitcasts(In);
20756 
20757     // concat_vectors(scalar_to_vector(scalar), undef) ->
20758     //     scalar_to_vector(scalar)
20759     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
20760          Scalar.hasOneUse()) {
20761       EVT SVT = Scalar.getValueType().getVectorElementType();
20762       if (SVT == Scalar.getOperand(0).getValueType())
20763         Scalar = Scalar.getOperand(0);
20764     }
20765 
20766     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
20767     if (!Scalar.getValueType().isVector()) {
20768       // If the bitcast type isn't legal, it might be a trunc of a legal type;
20769       // look through the trunc so we can still do the transform:
20770       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
20771       if (Scalar->getOpcode() == ISD::TRUNCATE &&
20772           !TLI.isTypeLegal(Scalar.getValueType()) &&
20773           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
20774         Scalar = Scalar->getOperand(0);
20775 
20776       EVT SclTy = Scalar.getValueType();
20777 
20778       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
20779         return SDValue();
20780 
20781       // Bail out if the vector size is not a multiple of the scalar size.
20782       if (VT.getSizeInBits() % SclTy.getSizeInBits())
20783         return SDValue();
20784 
20785       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
20786       if (VNTNumElms < 2)
20787         return SDValue();
20788 
20789       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
20790       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
20791         return SDValue();
20792 
20793       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
20794       return DAG.getBitcast(VT, Res);
20795     }
20796   }
20797 
20798   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
20799   // We have already tested above for an UNDEF only concatenation.
20800   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
20801   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
20802   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
20803     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
20804   };
20805   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
20806     SmallVector<SDValue, 8> Opnds;
20807     EVT SVT = VT.getScalarType();
20808 
20809     EVT MinVT = SVT;
20810     if (!SVT.isFloatingPoint()) {
20811       // If BUILD_VECTOR are from built from integer, they may have different
20812       // operand types. Get the smallest type and truncate all operands to it.
20813       bool FoundMinVT = false;
20814       for (const SDValue &Op : N->ops())
20815         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20816           EVT OpSVT = Op.getOperand(0).getValueType();
20817           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
20818           FoundMinVT = true;
20819         }
20820       assert(FoundMinVT && "Concat vector type mismatch");
20821     }
20822 
20823     for (const SDValue &Op : N->ops()) {
20824       EVT OpVT = Op.getValueType();
20825       unsigned NumElts = OpVT.getVectorNumElements();
20826 
20827       if (ISD::UNDEF == Op.getOpcode())
20828         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
20829 
20830       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
20831         if (SVT.isFloatingPoint()) {
20832           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
20833           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
20834         } else {
20835           for (unsigned i = 0; i != NumElts; ++i)
20836             Opnds.push_back(
20837                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
20838         }
20839       }
20840     }
20841 
20842     assert(VT.getVectorNumElements() == Opnds.size() &&
20843            "Concat vector type mismatch");
20844     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
20845   }
20846 
20847   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
20848   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
20849   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
20850     return V;
20851 
20852   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
20853     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
20854     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
20855       return V;
20856 
20857     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
20858     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
20859       return V;
20860   }
20861 
20862   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
20863     return V;
20864 
20865   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
20866   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
20867   // operands and look for a CONCAT operations that place the incoming vectors
20868   // at the exact same location.
20869   //
20870   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
20871   SDValue SingleSource = SDValue();
20872   unsigned PartNumElem =
20873       N->getOperand(0).getValueType().getVectorMinNumElements();
20874 
20875   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20876     SDValue Op = N->getOperand(i);
20877 
20878     if (Op.isUndef())
20879       continue;
20880 
20881     // Check if this is the identity extract:
20882     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20883       return SDValue();
20884 
20885     // Find the single incoming vector for the extract_subvector.
20886     if (SingleSource.getNode()) {
20887       if (Op.getOperand(0) != SingleSource)
20888         return SDValue();
20889     } else {
20890       SingleSource = Op.getOperand(0);
20891 
20892       // Check the source type is the same as the type of the result.
20893       // If not, this concat may extend the vector, so we can not
20894       // optimize it away.
20895       if (SingleSource.getValueType() != N->getValueType(0))
20896         return SDValue();
20897     }
20898 
20899     // Check that we are reading from the identity index.
20900     unsigned IdentityIndex = i * PartNumElem;
20901     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
20902       return SDValue();
20903   }
20904 
20905   if (SingleSource.getNode())
20906     return SingleSource;
20907 
20908   return SDValue();
20909 }
20910 
20911 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
20912 // if the subvector can be sourced for free.
20913 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
20914   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
20915       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
20916     return V.getOperand(1);
20917   }
20918   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
20919   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
20920       V.getOperand(0).getValueType() == SubVT &&
20921       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
20922     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
20923     return V.getOperand(SubIdx);
20924   }
20925   return SDValue();
20926 }
20927 
20928 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
20929                                               SelectionDAG &DAG,
20930                                               bool LegalOperations) {
20931   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20932   SDValue BinOp = Extract->getOperand(0);
20933   unsigned BinOpcode = BinOp.getOpcode();
20934   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
20935     return SDValue();
20936 
20937   EVT VecVT = BinOp.getValueType();
20938   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
20939   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
20940     return SDValue();
20941 
20942   SDValue Index = Extract->getOperand(1);
20943   EVT SubVT = Extract->getValueType(0);
20944   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
20945     return SDValue();
20946 
20947   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
20948   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
20949 
20950   // TODO: We could handle the case where only 1 operand is being inserted by
20951   //       creating an extract of the other operand, but that requires checking
20952   //       number of uses and/or costs.
20953   if (!Sub0 || !Sub1)
20954     return SDValue();
20955 
20956   // We are inserting both operands of the wide binop only to extract back
20957   // to the narrow vector size. Eliminate all of the insert/extract:
20958   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
20959   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
20960                      BinOp->getFlags());
20961 }
20962 
20963 /// If we are extracting a subvector produced by a wide binary operator try
20964 /// to use a narrow binary operator and/or avoid concatenation and extraction.
20965 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
20966                                           bool LegalOperations) {
20967   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
20968   // some of these bailouts with other transforms.
20969 
20970   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20971     return V;
20972 
20973   // The extract index must be a constant, so we can map it to a concat operand.
20974   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20975   if (!ExtractIndexC)
20976     return SDValue();
20977 
20978   // We are looking for an optionally bitcasted wide vector binary operator
20979   // feeding an extract subvector.
20980   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20981   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20982   unsigned BOpcode = BinOp.getOpcode();
20983   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
20984     return SDValue();
20985 
20986   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20987   // reduced to the unary fneg when it is visited, and we probably want to deal
20988   // with fneg in a target-specific way.
20989   if (BOpcode == ISD::FSUB) {
20990     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20991     if (C && C->getValueAPF().isNegZero())
20992       return SDValue();
20993   }
20994 
20995   // The binop must be a vector type, so we can extract some fraction of it.
20996   EVT WideBVT = BinOp.getValueType();
20997   // The optimisations below currently assume we are dealing with fixed length
20998   // vectors. It is possible to add support for scalable vectors, but at the
20999   // moment we've done no analysis to prove whether they are profitable or not.
21000   if (!WideBVT.isFixedLengthVector())
21001     return SDValue();
21002 
21003   EVT VT = Extract->getValueType(0);
21004   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
21005   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
21006          "Extract index is not a multiple of the vector length.");
21007 
21008   // Bail out if this is not a proper multiple width extraction.
21009   unsigned WideWidth = WideBVT.getSizeInBits();
21010   unsigned NarrowWidth = VT.getSizeInBits();
21011   if (WideWidth % NarrowWidth != 0)
21012     return SDValue();
21013 
21014   // Bail out if we are extracting a fraction of a single operation. This can
21015   // occur because we potentially looked through a bitcast of the binop.
21016   unsigned NarrowingRatio = WideWidth / NarrowWidth;
21017   unsigned WideNumElts = WideBVT.getVectorNumElements();
21018   if (WideNumElts % NarrowingRatio != 0)
21019     return SDValue();
21020 
21021   // Bail out if the target does not support a narrower version of the binop.
21022   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
21023                                    WideNumElts / NarrowingRatio);
21024   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
21025     return SDValue();
21026 
21027   // If extraction is cheap, we don't need to look at the binop operands
21028   // for concat ops. The narrow binop alone makes this transform profitable.
21029   // We can't just reuse the original extract index operand because we may have
21030   // bitcasted.
21031   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
21032   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
21033   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
21034       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
21035     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
21036     SDLoc DL(Extract);
21037     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21038     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21039                             BinOp.getOperand(0), NewExtIndex);
21040     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21041                             BinOp.getOperand(1), NewExtIndex);
21042     SDValue NarrowBinOp =
21043         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
21044     return DAG.getBitcast(VT, NarrowBinOp);
21045   }
21046 
21047   // Only handle the case where we are doubling and then halving. A larger ratio
21048   // may require more than two narrow binops to replace the wide binop.
21049   if (NarrowingRatio != 2)
21050     return SDValue();
21051 
21052   // TODO: The motivating case for this transform is an x86 AVX1 target. That
21053   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
21054   // flavors, but no other 256-bit integer support. This could be extended to
21055   // handle any binop, but that may require fixing/adding other folds to avoid
21056   // codegen regressions.
21057   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
21058     return SDValue();
21059 
21060   // We need at least one concatenation operation of a binop operand to make
21061   // this transform worthwhile. The concat must double the input vector sizes.
21062   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
21063     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
21064       return V.getOperand(ConcatOpNum);
21065     return SDValue();
21066   };
21067   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
21068   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
21069 
21070   if (SubVecL || SubVecR) {
21071     // If a binop operand was not the result of a concat, we must extract a
21072     // half-sized operand for our new narrow binop:
21073     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
21074     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
21075     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
21076     SDLoc DL(Extract);
21077     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21078     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
21079                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21080                                       BinOp.getOperand(0), IndexC);
21081 
21082     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
21083                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21084                                       BinOp.getOperand(1), IndexC);
21085 
21086     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
21087     return DAG.getBitcast(VT, NarrowBinOp);
21088   }
21089 
21090   return SDValue();
21091 }
21092 
21093 /// If we are extracting a subvector from a wide vector load, convert to a
21094 /// narrow load to eliminate the extraction:
21095 /// (extract_subvector (load wide vector)) --> (load narrow vector)
21096 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
21097   // TODO: Add support for big-endian. The offset calculation must be adjusted.
21098   if (DAG.getDataLayout().isBigEndian())
21099     return SDValue();
21100 
21101   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
21102   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
21103     return SDValue();
21104 
21105   // Allow targets to opt-out.
21106   EVT VT = Extract->getValueType(0);
21107 
21108   // We can only create byte sized loads.
21109   if (!VT.isByteSized())
21110     return SDValue();
21111 
21112   unsigned Index = Extract->getConstantOperandVal(1);
21113   unsigned NumElts = VT.getVectorMinNumElements();
21114 
21115   // The definition of EXTRACT_SUBVECTOR states that the index must be a
21116   // multiple of the minimum number of elements in the result type.
21117   assert(Index % NumElts == 0 && "The extract subvector index is not a "
21118                                  "multiple of the result's element count");
21119 
21120   // It's fine to use TypeSize here as we know the offset will not be negative.
21121   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
21122 
21123   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21124   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
21125     return SDValue();
21126 
21127   // The narrow load will be offset from the base address of the old load if
21128   // we are extracting from something besides index 0 (little-endian).
21129   SDLoc DL(Extract);
21130 
21131   // TODO: Use "BaseIndexOffset" to make this more effective.
21132   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
21133 
21134   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
21135   MachineFunction &MF = DAG.getMachineFunction();
21136   MachineMemOperand *MMO;
21137   if (Offset.isScalable()) {
21138     MachinePointerInfo MPI =
21139         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
21140     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
21141   } else
21142     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
21143                                   StoreSize);
21144 
21145   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
21146   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
21147   return NewLd;
21148 }
21149 
21150 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
21151 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
21152 ///                                EXTRACT_SUBVECTOR(Op?, ?),
21153 ///                                Mask'))
21154 /// iff it is legal and profitable to do so. Notably, the trimmed mask
21155 /// (containing only the elements that are extracted)
21156 /// must reference at most two subvectors.
21157 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
21158                                                      SelectionDAG &DAG,
21159                                                      const TargetLowering &TLI,
21160                                                      bool LegalOperations) {
21161   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21162          "Must only be called on EXTRACT_SUBVECTOR's");
21163 
21164   SDValue N0 = N->getOperand(0);
21165 
21166   // Only deal with non-scalable vectors.
21167   EVT NarrowVT = N->getValueType(0);
21168   EVT WideVT = N0.getValueType();
21169   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
21170     return SDValue();
21171 
21172   // The operand must be a shufflevector.
21173   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
21174   if (!WideShuffleVector)
21175     return SDValue();
21176 
21177   // The old shuffleneeds to go away.
21178   if (!WideShuffleVector->hasOneUse())
21179     return SDValue();
21180 
21181   // And the narrow shufflevector that we'll form must be legal.
21182   if (LegalOperations &&
21183       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
21184     return SDValue();
21185 
21186   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
21187   int NumEltsExtracted = NarrowVT.getVectorNumElements();
21188   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
21189          "Extract index is not a multiple of the output vector length.");
21190 
21191   int WideNumElts = WideVT.getVectorNumElements();
21192 
21193   SmallVector<int, 16> NewMask;
21194   NewMask.reserve(NumEltsExtracted);
21195   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
21196       DemandedSubvectors;
21197 
21198   // Try to decode the wide mask into narrow mask from at most two subvectors.
21199   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
21200                                                   NumEltsExtracted)) {
21201     assert((M >= -1) && (M < (2 * WideNumElts)) &&
21202            "Out-of-bounds shuffle mask?");
21203 
21204     if (M < 0) {
21205       // Does not depend on operands, does not require adjustment.
21206       NewMask.emplace_back(M);
21207       continue;
21208     }
21209 
21210     // From which operand of the shuffle does this shuffle mask element pick?
21211     int WideShufOpIdx = M / WideNumElts;
21212     // Which element of that operand is picked?
21213     int OpEltIdx = M % WideNumElts;
21214 
21215     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
21216            "Shuffle mask vector decomposition failure.");
21217 
21218     // And which NumEltsExtracted-sized subvector of that operand is that?
21219     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
21220     // And which element within that subvector of that operand is that?
21221     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
21222 
21223     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
21224            "Shuffle mask subvector decomposition failure.");
21225 
21226     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
21227             WideShufOpIdx * WideNumElts) == M &&
21228            "Shuffle mask full decomposition failure.");
21229 
21230     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
21231 
21232     if (Op.isUndef()) {
21233       // Picking from an undef operand. Let's adjust mask instead.
21234       NewMask.emplace_back(-1);
21235       continue;
21236     }
21237 
21238     // Profitability check: only deal with extractions from the first subvector.
21239     if (OpSubvecIdx != 0)
21240       return SDValue();
21241 
21242     const std::pair<SDValue, int> DemandedSubvector =
21243         std::make_pair(Op, OpSubvecIdx);
21244 
21245     if (DemandedSubvectors.insert(DemandedSubvector)) {
21246       if (DemandedSubvectors.size() > 2)
21247         return SDValue(); // We can't handle more than two subvectors.
21248       // How many elements into the WideVT does this subvector start?
21249       int Index = NumEltsExtracted * OpSubvecIdx;
21250       // Bail out if the extraction isn't going to be cheap.
21251       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21252         return SDValue();
21253     }
21254 
21255     // Ok, but from which operand of the new shuffle will this element pick?
21256     int NewOpIdx =
21257         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21258     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21259 
21260     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21261     NewMask.emplace_back(AdjM);
21262   }
21263   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21264   assert(DemandedSubvectors.size() <= 2 &&
21265          "Should have ended up demanding at most two subvectors.");
21266 
21267   // Did we discover that the shuffle does not actually depend on operands?
21268   if (DemandedSubvectors.empty())
21269     return DAG.getUNDEF(NarrowVT);
21270 
21271   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
21272   // operand[s]/index[es], so there is no point in checking for it's legality.
21273 
21274   // Do not turn a legal shuffle into an illegal one.
21275   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21276       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21277     return SDValue();
21278 
21279   SDLoc DL(N);
21280 
21281   SmallVector<SDValue, 2> NewOps;
21282   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21283            &DemandedSubvector : DemandedSubvectors) {
21284     // How many elements into the WideVT does this subvector start?
21285     int Index = NumEltsExtracted * DemandedSubvector.second;
21286     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21287     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21288                                     DemandedSubvector.first, IndexC));
21289   }
21290   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21291          "Should end up with either one or two ops");
21292 
21293   // If we ended up with only one operand, pad with an undef.
21294   if (NewOps.size() == 1)
21295     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21296 
21297   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21298 }
21299 
21300 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21301   EVT NVT = N->getValueType(0);
21302   SDValue V = N->getOperand(0);
21303   uint64_t ExtIdx = N->getConstantOperandVal(1);
21304 
21305   // Extract from UNDEF is UNDEF.
21306   if (V.isUndef())
21307     return DAG.getUNDEF(NVT);
21308 
21309   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
21310     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21311       return NarrowLoad;
21312 
21313   // Combine an extract of an extract into a single extract_subvector.
21314   // ext (ext X, C), 0 --> ext X, C
21315   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21316     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21317                                     V.getConstantOperandVal(1)) &&
21318         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
21319       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21320                          V.getOperand(1));
21321     }
21322   }
21323 
21324   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
21325   if (V.getOpcode() == ISD::SPLAT_VECTOR)
21326     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
21327       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
21328         return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
21329 
21330   // Try to move vector bitcast after extract_subv by scaling extraction index:
21331   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21332   if (V.getOpcode() == ISD::BITCAST &&
21333       V.getOperand(0).getValueType().isVector() &&
21334       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21335     SDValue SrcOp = V.getOperand(0);
21336     EVT SrcVT = SrcOp.getValueType();
21337     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21338     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21339     if ((SrcNumElts % DestNumElts) == 0) {
21340       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21341       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21342       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21343                                       NewExtEC);
21344       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21345         SDLoc DL(N);
21346         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21347         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21348                                          V.getOperand(0), NewIndex);
21349         return DAG.getBitcast(NVT, NewExtract);
21350       }
21351     }
21352     if ((DestNumElts % SrcNumElts) == 0) {
21353       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21354       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21355         ElementCount NewExtEC =
21356             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21357         EVT ScalarVT = SrcVT.getScalarType();
21358         if ((ExtIdx % DestSrcRatio) == 0) {
21359           SDLoc DL(N);
21360           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21361           EVT NewExtVT =
21362               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21363           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21364             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21365             SDValue NewExtract =
21366                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21367                             V.getOperand(0), NewIndex);
21368             return DAG.getBitcast(NVT, NewExtract);
21369           }
21370           if (NewExtEC.isScalar() &&
21371               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
21372             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21373             SDValue NewExtract =
21374                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21375                             V.getOperand(0), NewIndex);
21376             return DAG.getBitcast(NVT, NewExtract);
21377           }
21378         }
21379       }
21380     }
21381   }
21382 
21383   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21384     unsigned ExtNumElts = NVT.getVectorMinNumElements();
21385     EVT ConcatSrcVT = V.getOperand(0).getValueType();
21386     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21387            "Concat and extract subvector do not change element type");
21388     assert((ExtIdx % ExtNumElts) == 0 &&
21389            "Extract index is not a multiple of the input vector length.");
21390 
21391     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21392     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21393 
21394     // If the concatenated source types match this extract, it's a direct
21395     // simplification:
21396     // extract_subvec (concat V1, V2, ...), i --> Vi
21397     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21398       return V.getOperand(ConcatOpIdx);
21399 
21400     // If the concatenated source vectors are a multiple length of this extract,
21401     // then extract a fraction of one of those source vectors directly from a
21402     // concat operand. Example:
21403     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21404     //   v2i8 extract_subvec v8i8 Y, 6
21405     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21406         ConcatSrcNumElts % ExtNumElts == 0) {
21407       SDLoc DL(N);
21408       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21409       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21410              "Trying to extract from >1 concat operand?");
21411       assert(NewExtIdx % ExtNumElts == 0 &&
21412              "Extract index is not a multiple of the input vector length.");
21413       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21414       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21415                          V.getOperand(ConcatOpIdx), NewIndexC);
21416     }
21417   }
21418 
21419   if (SDValue V =
21420           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21421     return V;
21422 
21423   V = peekThroughBitcasts(V);
21424 
21425   // If the input is a build vector. Try to make a smaller build vector.
21426   if (V.getOpcode() == ISD::BUILD_VECTOR) {
21427     EVT InVT = V.getValueType();
21428     unsigned ExtractSize = NVT.getSizeInBits();
21429     unsigned EltSize = InVT.getScalarSizeInBits();
21430     // Only do this if we won't split any elements.
21431     if (ExtractSize % EltSize == 0) {
21432       unsigned NumElems = ExtractSize / EltSize;
21433       EVT EltVT = InVT.getVectorElementType();
21434       EVT ExtractVT =
21435           NumElems == 1 ? EltVT
21436                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21437       if ((Level < AfterLegalizeDAG ||
21438            (NumElems == 1 ||
21439             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21440           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21441         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21442 
21443         if (NumElems == 1) {
21444           SDValue Src = V->getOperand(IdxVal);
21445           if (EltVT != Src.getValueType())
21446             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21447           return DAG.getBitcast(NVT, Src);
21448         }
21449 
21450         // Extract the pieces from the original build_vector.
21451         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21452                                               V->ops().slice(IdxVal, NumElems));
21453         return DAG.getBitcast(NVT, BuildVec);
21454       }
21455     }
21456   }
21457 
21458   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21459     // Handle only simple case where vector being inserted and vector
21460     // being extracted are of same size.
21461     EVT SmallVT = V.getOperand(1).getValueType();
21462     if (!NVT.bitsEq(SmallVT))
21463       return SDValue();
21464 
21465     // Combine:
21466     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21467     // Into:
21468     //    indices are equal or bit offsets are equal => V1
21469     //    otherwise => (extract_subvec V1, ExtIdx)
21470     uint64_t InsIdx = V.getConstantOperandVal(2);
21471     if (InsIdx * SmallVT.getScalarSizeInBits() ==
21472         ExtIdx * NVT.getScalarSizeInBits()) {
21473       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21474         return SDValue();
21475 
21476       return DAG.getBitcast(NVT, V.getOperand(1));
21477     }
21478     return DAG.getNode(
21479         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21480         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21481         N->getOperand(1));
21482   }
21483 
21484   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21485     return NarrowBOp;
21486 
21487   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21488     return SDValue(N, 0);
21489 
21490   return SDValue();
21491 }
21492 
21493 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21494 /// followed by concatenation. Narrow vector ops may have better performance
21495 /// than wide ops, and this can unlock further narrowing of other vector ops.
21496 /// Targets can invert this transform later if it is not profitable.
21497 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21498                                          SelectionDAG &DAG) {
21499   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21500   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21501       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21502       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21503     return SDValue();
21504 
21505   // Split the wide shuffle mask into halves. Any mask element that is accessing
21506   // operand 1 is offset down to account for narrowing of the vectors.
21507   ArrayRef<int> Mask = Shuf->getMask();
21508   EVT VT = Shuf->getValueType(0);
21509   unsigned NumElts = VT.getVectorNumElements();
21510   unsigned HalfNumElts = NumElts / 2;
21511   SmallVector<int, 16> Mask0(HalfNumElts, -1);
21512   SmallVector<int, 16> Mask1(HalfNumElts, -1);
21513   for (unsigned i = 0; i != NumElts; ++i) {
21514     if (Mask[i] == -1)
21515       continue;
21516     // If we reference the upper (undef) subvector then the element is undef.
21517     if ((Mask[i] % NumElts) >= HalfNumElts)
21518       continue;
21519     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21520     if (i < HalfNumElts)
21521       Mask0[i] = M;
21522     else
21523       Mask1[i - HalfNumElts] = M;
21524   }
21525 
21526   // Ask the target if this is a valid transform.
21527   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21528   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21529                                 HalfNumElts);
21530   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21531       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21532     return SDValue();
21533 
21534   // shuffle (concat X, undef), (concat Y, undef), Mask -->
21535   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21536   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21537   SDLoc DL(Shuf);
21538   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21539   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21540   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21541 }
21542 
21543 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21544 // or turn a shuffle of a single concat into simpler shuffle then concat.
21545 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21546   EVT VT = N->getValueType(0);
21547   unsigned NumElts = VT.getVectorNumElements();
21548 
21549   SDValue N0 = N->getOperand(0);
21550   SDValue N1 = N->getOperand(1);
21551   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21552   ArrayRef<int> Mask = SVN->getMask();
21553 
21554   SmallVector<SDValue, 4> Ops;
21555   EVT ConcatVT = N0.getOperand(0).getValueType();
21556   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21557   unsigned NumConcats = NumElts / NumElemsPerConcat;
21558 
21559   auto IsUndefMaskElt = [](int i) { return i == -1; };
21560 
21561   // Special case: shuffle(concat(A,B)) can be more efficiently represented
21562   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21563   // half vector elements.
21564   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21565       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21566                    IsUndefMaskElt)) {
21567     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21568                               N0.getOperand(1),
21569                               Mask.slice(0, NumElemsPerConcat));
21570     N1 = DAG.getUNDEF(ConcatVT);
21571     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21572   }
21573 
21574   // Look at every vector that's inserted. We're looking for exact
21575   // subvector-sized copies from a concatenated vector
21576   for (unsigned I = 0; I != NumConcats; ++I) {
21577     unsigned Begin = I * NumElemsPerConcat;
21578     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21579 
21580     // Make sure we're dealing with a copy.
21581     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21582       Ops.push_back(DAG.getUNDEF(ConcatVT));
21583       continue;
21584     }
21585 
21586     int OpIdx = -1;
21587     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21588       if (IsUndefMaskElt(SubMask[i]))
21589         continue;
21590       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21591         return SDValue();
21592       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21593       if (0 <= OpIdx && EltOpIdx != OpIdx)
21594         return SDValue();
21595       OpIdx = EltOpIdx;
21596     }
21597     assert(0 <= OpIdx && "Unknown concat_vectors op");
21598 
21599     if (OpIdx < (int)N0.getNumOperands())
21600       Ops.push_back(N0.getOperand(OpIdx));
21601     else
21602       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21603   }
21604 
21605   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21606 }
21607 
21608 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21609 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21610 //
21611 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21612 // a simplification in some sense, but it isn't appropriate in general: some
21613 // BUILD_VECTORs are substantially cheaper than others. The general case
21614 // of a BUILD_VECTOR requires inserting each element individually (or
21615 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21616 // all constants is a single constant pool load.  A BUILD_VECTOR where each
21617 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
21618 // are undef lowers to a small number of element insertions.
21619 //
21620 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21621 // We don't fold shuffles where one side is a non-zero constant, and we don't
21622 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21623 // non-constant operands. This seems to work out reasonably well in practice.
21624 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21625                                        SelectionDAG &DAG,
21626                                        const TargetLowering &TLI) {
21627   EVT VT = SVN->getValueType(0);
21628   unsigned NumElts = VT.getVectorNumElements();
21629   SDValue N0 = SVN->getOperand(0);
21630   SDValue N1 = SVN->getOperand(1);
21631 
21632   if (!N0->hasOneUse())
21633     return SDValue();
21634 
21635   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21636   // discussed above.
21637   if (!N1.isUndef()) {
21638     if (!N1->hasOneUse())
21639       return SDValue();
21640 
21641     bool N0AnyConst = isAnyConstantBuildVector(N0);
21642     bool N1AnyConst = isAnyConstantBuildVector(N1);
21643     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21644       return SDValue();
21645     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21646       return SDValue();
21647   }
21648 
21649   // If both inputs are splats of the same value then we can safely merge this
21650   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21651   bool IsSplat = false;
21652   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21653   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21654   if (BV0 && BV1)
21655     if (SDValue Splat0 = BV0->getSplatValue())
21656       IsSplat = (Splat0 == BV1->getSplatValue());
21657 
21658   SmallVector<SDValue, 8> Ops;
21659   SmallSet<SDValue, 16> DuplicateOps;
21660   for (int M : SVN->getMask()) {
21661     SDValue Op = DAG.getUNDEF(VT.getScalarType());
21662     if (M >= 0) {
21663       int Idx = M < (int)NumElts ? M : M - NumElts;
21664       SDValue &S = (M < (int)NumElts ? N0 : N1);
21665       if (S.getOpcode() == ISD::BUILD_VECTOR) {
21666         Op = S.getOperand(Idx);
21667       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21668         SDValue Op0 = S.getOperand(0);
21669         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21670       } else {
21671         // Operand can't be combined - bail out.
21672         return SDValue();
21673       }
21674     }
21675 
21676     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21677     // generating a splat; semantically, this is fine, but it's likely to
21678     // generate low-quality code if the target can't reconstruct an appropriate
21679     // shuffle.
21680     if (!Op.isUndef() && !isIntOrFPConstant(Op))
21681       if (!IsSplat && !DuplicateOps.insert(Op).second)
21682         return SDValue();
21683 
21684     Ops.push_back(Op);
21685   }
21686 
21687   // BUILD_VECTOR requires all inputs to be of the same type, find the
21688   // maximum type and extend them all.
21689   EVT SVT = VT.getScalarType();
21690   if (SVT.isInteger())
21691     for (SDValue &Op : Ops)
21692       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21693   if (SVT != VT.getScalarType())
21694     for (SDValue &Op : Ops)
21695       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
21696                         : (TLI.isZExtFree(Op.getValueType(), SVT)
21697                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21698                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
21699   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21700 }
21701 
21702 // Match shuffles that can be converted to any_vector_extend_in_reg.
21703 // This is often generated during legalization.
21704 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21705 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21706 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21707                                             SelectionDAG &DAG,
21708                                             const TargetLowering &TLI,
21709                                             bool LegalOperations) {
21710   EVT VT = SVN->getValueType(0);
21711   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21712 
21713   // TODO Add support for big-endian when we have a test case.
21714   if (!VT.isInteger() || IsBigEndian)
21715     return SDValue();
21716 
21717   unsigned NumElts = VT.getVectorNumElements();
21718   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21719   ArrayRef<int> Mask = SVN->getMask();
21720   SDValue N0 = SVN->getOperand(0);
21721 
21722   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
21723   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
21724     for (unsigned i = 0; i != NumElts; ++i) {
21725       if (Mask[i] < 0)
21726         continue;
21727       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
21728         continue;
21729       return false;
21730     }
21731     return true;
21732   };
21733 
21734   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
21735   // power-of-2 extensions as they are the most likely.
21736   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
21737     // Check for non power of 2 vector sizes
21738     if (NumElts % Scale != 0)
21739       continue;
21740     if (!isAnyExtend(Scale))
21741       continue;
21742 
21743     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
21744     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
21745     // Never create an illegal type. Only create unsupported operations if we
21746     // are pre-legalization.
21747     if (TLI.isTypeLegal(OutVT))
21748       if (!LegalOperations ||
21749           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
21750         return DAG.getBitcast(VT,
21751                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
21752                                           SDLoc(SVN), OutVT, N0));
21753   }
21754 
21755   return SDValue();
21756 }
21757 
21758 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
21759 // each source element of a large type into the lowest elements of a smaller
21760 // destination type. This is often generated during legalization.
21761 // If the source node itself was a '*_extend_vector_inreg' node then we should
21762 // then be able to remove it.
21763 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
21764                                         SelectionDAG &DAG) {
21765   EVT VT = SVN->getValueType(0);
21766   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21767 
21768   // TODO Add support for big-endian when we have a test case.
21769   if (!VT.isInteger() || IsBigEndian)
21770     return SDValue();
21771 
21772   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
21773 
21774   unsigned Opcode = N0.getOpcode();
21775   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
21776       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
21777       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
21778     return SDValue();
21779 
21780   SDValue N00 = N0.getOperand(0);
21781   ArrayRef<int> Mask = SVN->getMask();
21782   unsigned NumElts = VT.getVectorNumElements();
21783   unsigned EltSizeInBits = VT.getScalarSizeInBits();
21784   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
21785   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
21786 
21787   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
21788     return SDValue();
21789   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
21790 
21791   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
21792   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
21793   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
21794   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
21795     for (unsigned i = 0; i != NumElts; ++i) {
21796       if (Mask[i] < 0)
21797         continue;
21798       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
21799         continue;
21800       return false;
21801     }
21802     return true;
21803   };
21804 
21805   // At the moment we just handle the case where we've truncated back to the
21806   // same size as before the extension.
21807   // TODO: handle more extension/truncation cases as cases arise.
21808   if (EltSizeInBits != ExtSrcSizeInBits)
21809     return SDValue();
21810 
21811   // We can remove *extend_vector_inreg only if the truncation happens at
21812   // the same scale as the extension.
21813   if (isTruncate(ExtScale))
21814     return DAG.getBitcast(VT, N00);
21815 
21816   return SDValue();
21817 }
21818 
21819 // Combine shuffles of splat-shuffles of the form:
21820 // shuffle (shuffle V, undef, splat-mask), undef, M
21821 // If splat-mask contains undef elements, we need to be careful about
21822 // introducing undef's in the folded mask which are not the result of composing
21823 // the masks of the shuffles.
21824 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
21825                                         SelectionDAG &DAG) {
21826   if (!Shuf->getOperand(1).isUndef())
21827     return SDValue();
21828   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
21829   if (!Splat || !Splat->isSplat())
21830     return SDValue();
21831 
21832   ArrayRef<int> ShufMask = Shuf->getMask();
21833   ArrayRef<int> SplatMask = Splat->getMask();
21834   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
21835 
21836   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
21837   // every undef mask element in the splat-shuffle has a corresponding undef
21838   // element in the user-shuffle's mask or if the composition of mask elements
21839   // would result in undef.
21840   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
21841   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
21842   //   In this case it is not legal to simplify to the splat-shuffle because we
21843   //   may be exposing the users of the shuffle an undef element at index 1
21844   //   which was not there before the combine.
21845   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
21846   //   In this case the composition of masks yields SplatMask, so it's ok to
21847   //   simplify to the splat-shuffle.
21848   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
21849   //   In this case the composed mask includes all undef elements of SplatMask
21850   //   and in addition sets element zero to undef. It is safe to simplify to
21851   //   the splat-shuffle.
21852   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
21853                                        ArrayRef<int> SplatMask) {
21854     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
21855       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
21856           SplatMask[UserMask[i]] != -1)
21857         return false;
21858     return true;
21859   };
21860   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
21861     return Shuf->getOperand(0);
21862 
21863   // Create a new shuffle with a mask that is composed of the two shuffles'
21864   // masks.
21865   SmallVector<int, 32> NewMask;
21866   for (int Idx : ShufMask)
21867     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
21868 
21869   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
21870                               Splat->getOperand(0), Splat->getOperand(1),
21871                               NewMask);
21872 }
21873 
21874 /// Combine shuffle of shuffle of the form:
21875 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
21876 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
21877                                      SelectionDAG &DAG) {
21878   if (!OuterShuf->getOperand(1).isUndef())
21879     return SDValue();
21880   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
21881   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
21882     return SDValue();
21883 
21884   ArrayRef<int> OuterMask = OuterShuf->getMask();
21885   ArrayRef<int> InnerMask = InnerShuf->getMask();
21886   unsigned NumElts = OuterMask.size();
21887   assert(NumElts == InnerMask.size() && "Mask length mismatch");
21888   SmallVector<int, 32> CombinedMask(NumElts, -1);
21889   int SplatIndex = -1;
21890   for (unsigned i = 0; i != NumElts; ++i) {
21891     // Undef lanes remain undef.
21892     int OuterMaskElt = OuterMask[i];
21893     if (OuterMaskElt == -1)
21894       continue;
21895 
21896     // Peek through the shuffle masks to get the underlying source element.
21897     int InnerMaskElt = InnerMask[OuterMaskElt];
21898     if (InnerMaskElt == -1)
21899       continue;
21900 
21901     // Initialize the splatted element.
21902     if (SplatIndex == -1)
21903       SplatIndex = InnerMaskElt;
21904 
21905     // Non-matching index - this is not a splat.
21906     if (SplatIndex != InnerMaskElt)
21907       return SDValue();
21908 
21909     CombinedMask[i] = InnerMaskElt;
21910   }
21911   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
21912           getSplatIndex(CombinedMask) != -1) &&
21913          "Expected a splat mask");
21914 
21915   // TODO: The transform may be a win even if the mask is not legal.
21916   EVT VT = OuterShuf->getValueType(0);
21917   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
21918   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
21919     return SDValue();
21920 
21921   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
21922                               InnerShuf->getOperand(1), CombinedMask);
21923 }
21924 
21925 /// If the shuffle mask is taking exactly one element from the first vector
21926 /// operand and passing through all other elements from the second vector
21927 /// operand, return the index of the mask element that is choosing an element
21928 /// from the first operand. Otherwise, return -1.
21929 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
21930   int MaskSize = Mask.size();
21931   int EltFromOp0 = -1;
21932   // TODO: This does not match if there are undef elements in the shuffle mask.
21933   // Should we ignore undefs in the shuffle mask instead? The trade-off is
21934   // removing an instruction (a shuffle), but losing the knowledge that some
21935   // vector lanes are not needed.
21936   for (int i = 0; i != MaskSize; ++i) {
21937     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
21938       // We're looking for a shuffle of exactly one element from operand 0.
21939       if (EltFromOp0 != -1)
21940         return -1;
21941       EltFromOp0 = i;
21942     } else if (Mask[i] != i + MaskSize) {
21943       // Nothing from operand 1 can change lanes.
21944       return -1;
21945     }
21946   }
21947   return EltFromOp0;
21948 }
21949 
21950 /// If a shuffle inserts exactly one element from a source vector operand into
21951 /// another vector operand and we can access the specified element as a scalar,
21952 /// then we can eliminate the shuffle.
21953 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
21954                                       SelectionDAG &DAG) {
21955   // First, check if we are taking one element of a vector and shuffling that
21956   // element into another vector.
21957   ArrayRef<int> Mask = Shuf->getMask();
21958   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
21959   SDValue Op0 = Shuf->getOperand(0);
21960   SDValue Op1 = Shuf->getOperand(1);
21961   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
21962   if (ShufOp0Index == -1) {
21963     // Commute mask and check again.
21964     ShuffleVectorSDNode::commuteMask(CommutedMask);
21965     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
21966     if (ShufOp0Index == -1)
21967       return SDValue();
21968     // Commute operands to match the commuted shuffle mask.
21969     std::swap(Op0, Op1);
21970     Mask = CommutedMask;
21971   }
21972 
21973   // The shuffle inserts exactly one element from operand 0 into operand 1.
21974   // Now see if we can access that element as a scalar via a real insert element
21975   // instruction.
21976   // TODO: We can try harder to locate the element as a scalar. Examples: it
21977   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
21978   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
21979          "Shuffle mask value must be from operand 0");
21980   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
21981     return SDValue();
21982 
21983   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
21984   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
21985     return SDValue();
21986 
21987   // There's an existing insertelement with constant insertion index, so we
21988   // don't need to check the legality/profitability of a replacement operation
21989   // that differs at most in the constant value. The target should be able to
21990   // lower any of those in a similar way. If not, legalization will expand this
21991   // to a scalar-to-vector plus shuffle.
21992   //
21993   // Note that the shuffle may move the scalar from the position that the insert
21994   // element used. Therefore, our new insert element occurs at the shuffle's
21995   // mask index value, not the insert's index value.
21996   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
21997   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
21998   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
21999                      Op1, Op0.getOperand(1), NewInsIndex);
22000 }
22001 
22002 /// If we have a unary shuffle of a shuffle, see if it can be folded away
22003 /// completely. This has the potential to lose undef knowledge because the first
22004 /// shuffle may not have an undef mask element where the second one does. So
22005 /// only call this after doing simplifications based on demanded elements.
22006 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
22007   // shuf (shuf0 X, Y, Mask0), undef, Mask
22008   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22009   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
22010     return SDValue();
22011 
22012   ArrayRef<int> Mask = Shuf->getMask();
22013   ArrayRef<int> Mask0 = Shuf0->getMask();
22014   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
22015     // Ignore undef elements.
22016     if (Mask[i] == -1)
22017       continue;
22018     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
22019 
22020     // Is the element of the shuffle operand chosen by this shuffle the same as
22021     // the element chosen by the shuffle operand itself?
22022     if (Mask0[Mask[i]] != Mask0[i])
22023       return SDValue();
22024   }
22025   // Every element of this shuffle is identical to the result of the previous
22026   // shuffle, so we can replace this value.
22027   return Shuf->getOperand(0);
22028 }
22029 
22030 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
22031   EVT VT = N->getValueType(0);
22032   unsigned NumElts = VT.getVectorNumElements();
22033 
22034   SDValue N0 = N->getOperand(0);
22035   SDValue N1 = N->getOperand(1);
22036 
22037   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
22038 
22039   // Canonicalize shuffle undef, undef -> undef
22040   if (N0.isUndef() && N1.isUndef())
22041     return DAG.getUNDEF(VT);
22042 
22043   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
22044 
22045   // Canonicalize shuffle v, v -> v, undef
22046   if (N0 == N1)
22047     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
22048                                 createUnaryMask(SVN->getMask(), NumElts));
22049 
22050   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
22051   if (N0.isUndef())
22052     return DAG.getCommutedVectorShuffle(*SVN);
22053 
22054   // Remove references to rhs if it is undef
22055   if (N1.isUndef()) {
22056     bool Changed = false;
22057     SmallVector<int, 8> NewMask;
22058     for (unsigned i = 0; i != NumElts; ++i) {
22059       int Idx = SVN->getMaskElt(i);
22060       if (Idx >= (int)NumElts) {
22061         Idx = -1;
22062         Changed = true;
22063       }
22064       NewMask.push_back(Idx);
22065     }
22066     if (Changed)
22067       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
22068   }
22069 
22070   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
22071     return InsElt;
22072 
22073   // A shuffle of a single vector that is a splatted value can always be folded.
22074   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
22075     return V;
22076 
22077   if (SDValue V = formSplatFromShuffles(SVN, DAG))
22078     return V;
22079 
22080   // If it is a splat, check if the argument vector is another splat or a
22081   // build_vector.
22082   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
22083     int SplatIndex = SVN->getSplatIndex();
22084     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
22085         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
22086       // splat (vector_bo L, R), Index -->
22087       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
22088       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
22089       SDLoc DL(N);
22090       EVT EltVT = VT.getScalarType();
22091       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
22092       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
22093       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
22094       SDValue NewBO =
22095           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
22096       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
22097       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
22098       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
22099     }
22100 
22101     // If this is a bit convert that changes the element type of the vector but
22102     // not the number of vector elements, look through it.  Be careful not to
22103     // look though conversions that change things like v4f32 to v2f64.
22104     SDNode *V = N0.getNode();
22105     if (V->getOpcode() == ISD::BITCAST) {
22106       SDValue ConvInput = V->getOperand(0);
22107       if (ConvInput.getValueType().isVector() &&
22108           ConvInput.getValueType().getVectorNumElements() == NumElts)
22109         V = ConvInput.getNode();
22110     }
22111 
22112     if (V->getOpcode() == ISD::BUILD_VECTOR) {
22113       assert(V->getNumOperands() == NumElts &&
22114              "BUILD_VECTOR has wrong number of operands");
22115       SDValue Base;
22116       bool AllSame = true;
22117       for (unsigned i = 0; i != NumElts; ++i) {
22118         if (!V->getOperand(i).isUndef()) {
22119           Base = V->getOperand(i);
22120           break;
22121         }
22122       }
22123       // Splat of <u, u, u, u>, return <u, u, u, u>
22124       if (!Base.getNode())
22125         return N0;
22126       for (unsigned i = 0; i != NumElts; ++i) {
22127         if (V->getOperand(i) != Base) {
22128           AllSame = false;
22129           break;
22130         }
22131       }
22132       // Splat of <x, x, x, x>, return <x, x, x, x>
22133       if (AllSame)
22134         return N0;
22135 
22136       // Canonicalize any other splat as a build_vector.
22137       SDValue Splatted = V->getOperand(SplatIndex);
22138       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
22139       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
22140 
22141       // We may have jumped through bitcasts, so the type of the
22142       // BUILD_VECTOR may not match the type of the shuffle.
22143       if (V->getValueType(0) != VT)
22144         NewBV = DAG.getBitcast(VT, NewBV);
22145       return NewBV;
22146     }
22147   }
22148 
22149   // Simplify source operands based on shuffle mask.
22150   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22151     return SDValue(N, 0);
22152 
22153   // This is intentionally placed after demanded elements simplification because
22154   // it could eliminate knowledge of undef elements created by this shuffle.
22155   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
22156     return ShufOp;
22157 
22158   // Match shuffles that can be converted to any_vector_extend_in_reg.
22159   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
22160     return V;
22161 
22162   // Combine "truncate_vector_in_reg" style shuffles.
22163   if (SDValue V = combineTruncationShuffle(SVN, DAG))
22164     return V;
22165 
22166   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
22167       Level < AfterLegalizeVectorOps &&
22168       (N1.isUndef() ||
22169       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
22170        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
22171     if (SDValue V = partitionShuffleOfConcats(N, DAG))
22172       return V;
22173   }
22174 
22175   // A shuffle of a concat of the same narrow vector can be reduced to use
22176   // only low-half elements of a concat with undef:
22177   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
22178   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
22179       N0.getNumOperands() == 2 &&
22180       N0.getOperand(0) == N0.getOperand(1)) {
22181     int HalfNumElts = (int)NumElts / 2;
22182     SmallVector<int, 8> NewMask;
22183     for (unsigned i = 0; i != NumElts; ++i) {
22184       int Idx = SVN->getMaskElt(i);
22185       if (Idx >= HalfNumElts) {
22186         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
22187         Idx -= HalfNumElts;
22188       }
22189       NewMask.push_back(Idx);
22190     }
22191     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
22192       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
22193       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
22194                                    N0.getOperand(0), UndefVec);
22195       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
22196     }
22197   }
22198 
22199   // See if we can replace a shuffle with an insert_subvector.
22200   // e.g. v2i32 into v8i32:
22201   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
22202   // --> insert_subvector(lhs,rhs1,4).
22203   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
22204       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
22205     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
22206       // Ensure RHS subvectors are legal.
22207       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
22208       EVT SubVT = RHS.getOperand(0).getValueType();
22209       int NumSubVecs = RHS.getNumOperands();
22210       int NumSubElts = SubVT.getVectorNumElements();
22211       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
22212       if (!TLI.isTypeLegal(SubVT))
22213         return SDValue();
22214 
22215       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
22216       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
22217         return SDValue();
22218 
22219       // Search [NumSubElts] spans for RHS sequence.
22220       // TODO: Can we avoid nested loops to increase performance?
22221       SmallVector<int> InsertionMask(NumElts);
22222       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
22223         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
22224           // Reset mask to identity.
22225           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
22226 
22227           // Add subvector insertion.
22228           std::iota(InsertionMask.begin() + SubIdx,
22229                     InsertionMask.begin() + SubIdx + NumSubElts,
22230                     NumElts + (SubVec * NumSubElts));
22231 
22232           // See if the shuffle mask matches the reference insertion mask.
22233           bool MatchingShuffle = true;
22234           for (int i = 0; i != (int)NumElts; ++i) {
22235             int ExpectIdx = InsertionMask[i];
22236             int ActualIdx = Mask[i];
22237             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
22238               MatchingShuffle = false;
22239               break;
22240             }
22241           }
22242 
22243           if (MatchingShuffle)
22244             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22245                                RHS.getOperand(SubVec),
22246                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22247         }
22248       }
22249       return SDValue();
22250     };
22251     ArrayRef<int> Mask = SVN->getMask();
22252     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22253       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22254         return InsertN1;
22255     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22256       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22257       ShuffleVectorSDNode::commuteMask(CommuteMask);
22258       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22259         return InsertN0;
22260     }
22261   }
22262 
22263   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22264   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22265   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22266     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22267       return Res;
22268 
22269   // If this shuffle only has a single input that is a bitcasted shuffle,
22270   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22271   // back to their original types.
22272   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22273       N1.isUndef() && Level < AfterLegalizeVectorOps &&
22274       TLI.isTypeLegal(VT)) {
22275 
22276     SDValue BC0 = peekThroughOneUseBitcasts(N0);
22277     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22278       EVT SVT = VT.getScalarType();
22279       EVT InnerVT = BC0->getValueType(0);
22280       EVT InnerSVT = InnerVT.getScalarType();
22281 
22282       // Determine which shuffle works with the smaller scalar type.
22283       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22284       EVT ScaleSVT = ScaleVT.getScalarType();
22285 
22286       if (TLI.isTypeLegal(ScaleVT) &&
22287           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22288           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22289         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22290         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22291 
22292         // Scale the shuffle masks to the smaller scalar type.
22293         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22294         SmallVector<int, 8> InnerMask;
22295         SmallVector<int, 8> OuterMask;
22296         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22297         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22298 
22299         // Merge the shuffle masks.
22300         SmallVector<int, 8> NewMask;
22301         for (int M : OuterMask)
22302           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22303 
22304         // Test for shuffle mask legality over both commutations.
22305         SDValue SV0 = BC0->getOperand(0);
22306         SDValue SV1 = BC0->getOperand(1);
22307         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22308         if (!LegalMask) {
22309           std::swap(SV0, SV1);
22310           ShuffleVectorSDNode::commuteMask(NewMask);
22311           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22312         }
22313 
22314         if (LegalMask) {
22315           SV0 = DAG.getBitcast(ScaleVT, SV0);
22316           SV1 = DAG.getBitcast(ScaleVT, SV1);
22317           return DAG.getBitcast(
22318               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22319         }
22320       }
22321     }
22322   }
22323 
22324   // Compute the combined shuffle mask for a shuffle with SV0 as the first
22325   // operand, and SV1 as the second operand.
22326   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22327   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22328   auto MergeInnerShuffle =
22329       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22330                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
22331                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22332                      SmallVectorImpl<int> &Mask) -> bool {
22333     // Don't try to fold splats; they're likely to simplify somehow, or they
22334     // might be free.
22335     if (OtherSVN->isSplat())
22336       return false;
22337 
22338     SV0 = SV1 = SDValue();
22339     Mask.clear();
22340 
22341     for (unsigned i = 0; i != NumElts; ++i) {
22342       int Idx = SVN->getMaskElt(i);
22343       if (Idx < 0) {
22344         // Propagate Undef.
22345         Mask.push_back(Idx);
22346         continue;
22347       }
22348 
22349       if (Commute)
22350         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22351 
22352       SDValue CurrentVec;
22353       if (Idx < (int)NumElts) {
22354         // This shuffle index refers to the inner shuffle N0. Lookup the inner
22355         // shuffle mask to identify which vector is actually referenced.
22356         Idx = OtherSVN->getMaskElt(Idx);
22357         if (Idx < 0) {
22358           // Propagate Undef.
22359           Mask.push_back(Idx);
22360           continue;
22361         }
22362         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22363                                           : OtherSVN->getOperand(1);
22364       } else {
22365         // This shuffle index references an element within N1.
22366         CurrentVec = N1;
22367       }
22368 
22369       // Simple case where 'CurrentVec' is UNDEF.
22370       if (CurrentVec.isUndef()) {
22371         Mask.push_back(-1);
22372         continue;
22373       }
22374 
22375       // Canonicalize the shuffle index. We don't know yet if CurrentVec
22376       // will be the first or second operand of the combined shuffle.
22377       Idx = Idx % NumElts;
22378       if (!SV0.getNode() || SV0 == CurrentVec) {
22379         // Ok. CurrentVec is the left hand side.
22380         // Update the mask accordingly.
22381         SV0 = CurrentVec;
22382         Mask.push_back(Idx);
22383         continue;
22384       }
22385       if (!SV1.getNode() || SV1 == CurrentVec) {
22386         // Ok. CurrentVec is the right hand side.
22387         // Update the mask accordingly.
22388         SV1 = CurrentVec;
22389         Mask.push_back(Idx + NumElts);
22390         continue;
22391       }
22392 
22393       // Last chance - see if the vector is another shuffle and if it
22394       // uses one of the existing candidate shuffle ops.
22395       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22396         int InnerIdx = CurrentSVN->getMaskElt(Idx);
22397         if (InnerIdx < 0) {
22398           Mask.push_back(-1);
22399           continue;
22400         }
22401         SDValue InnerVec = (InnerIdx < (int)NumElts)
22402                                ? CurrentSVN->getOperand(0)
22403                                : CurrentSVN->getOperand(1);
22404         if (InnerVec.isUndef()) {
22405           Mask.push_back(-1);
22406           continue;
22407         }
22408         InnerIdx %= NumElts;
22409         if (InnerVec == SV0) {
22410           Mask.push_back(InnerIdx);
22411           continue;
22412         }
22413         if (InnerVec == SV1) {
22414           Mask.push_back(InnerIdx + NumElts);
22415           continue;
22416         }
22417       }
22418 
22419       // Bail out if we cannot convert the shuffle pair into a single shuffle.
22420       return false;
22421     }
22422 
22423     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22424       return true;
22425 
22426     // Avoid introducing shuffles with illegal mask.
22427     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22428     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22429     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22430     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22431     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22432     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22433     if (TLI.isShuffleMaskLegal(Mask, VT))
22434       return true;
22435 
22436     std::swap(SV0, SV1);
22437     ShuffleVectorSDNode::commuteMask(Mask);
22438     return TLI.isShuffleMaskLegal(Mask, VT);
22439   };
22440 
22441   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22442     // Canonicalize shuffles according to rules:
22443     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22444     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22445     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22446     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22447         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22448       // The incoming shuffle must be of the same type as the result of the
22449       // current shuffle.
22450       assert(N1->getOperand(0).getValueType() == VT &&
22451              "Shuffle types don't match");
22452 
22453       SDValue SV0 = N1->getOperand(0);
22454       SDValue SV1 = N1->getOperand(1);
22455       bool HasSameOp0 = N0 == SV0;
22456       bool IsSV1Undef = SV1.isUndef();
22457       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22458         // Commute the operands of this shuffle so merging below will trigger.
22459         return DAG.getCommutedVectorShuffle(*SVN);
22460     }
22461 
22462     // Canonicalize splat shuffles to the RHS to improve merging below.
22463     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22464     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22465         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22466         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22467         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22468       return DAG.getCommutedVectorShuffle(*SVN);
22469     }
22470 
22471     // Try to fold according to rules:
22472     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22473     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22474     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22475     // Don't try to fold shuffles with illegal type.
22476     // Only fold if this shuffle is the only user of the other shuffle.
22477     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22478     for (int i = 0; i != 2; ++i) {
22479       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22480           N->isOnlyUserOf(N->getOperand(i).getNode())) {
22481         // The incoming shuffle must be of the same type as the result of the
22482         // current shuffle.
22483         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22484         assert(OtherSV->getOperand(0).getValueType() == VT &&
22485                "Shuffle types don't match");
22486 
22487         SDValue SV0, SV1;
22488         SmallVector<int, 4> Mask;
22489         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22490                               SV0, SV1, Mask)) {
22491           // Check if all indices in Mask are Undef. In case, propagate Undef.
22492           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22493             return DAG.getUNDEF(VT);
22494 
22495           return DAG.getVectorShuffle(VT, SDLoc(N),
22496                                       SV0 ? SV0 : DAG.getUNDEF(VT),
22497                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22498         }
22499       }
22500     }
22501 
22502     // Merge shuffles through binops if we are able to merge it with at least
22503     // one other shuffles.
22504     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22505     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22506     unsigned SrcOpcode = N0.getOpcode();
22507     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22508         (N1.isUndef() ||
22509          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22510       // Get binop source ops, or just pass on the undef.
22511       SDValue Op00 = N0.getOperand(0);
22512       SDValue Op01 = N0.getOperand(1);
22513       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22514       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22515       // TODO: We might be able to relax the VT check but we don't currently
22516       // have any isBinOp() that has different result/ops VTs so play safe until
22517       // we have test coverage.
22518       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22519           Op01.getValueType() == VT && Op11.getValueType() == VT &&
22520           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22521            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22522            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22523            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22524         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22525                                         SmallVectorImpl<int> &Mask, bool LeftOp,
22526                                         bool Commute) {
22527           SDValue InnerN = Commute ? N1 : N0;
22528           SDValue Op0 = LeftOp ? Op00 : Op01;
22529           SDValue Op1 = LeftOp ? Op10 : Op11;
22530           if (Commute)
22531             std::swap(Op0, Op1);
22532           // Only accept the merged shuffle if we don't introduce undef elements,
22533           // or the inner shuffle already contained undef elements.
22534           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22535           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22536                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22537                                    Mask) &&
22538                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22539                   llvm::none_of(Mask, [](int M) { return M < 0; }));
22540         };
22541 
22542         // Ensure we don't increase the number of shuffles - we must merge a
22543         // shuffle from at least one of the LHS and RHS ops.
22544         bool MergedLeft = false;
22545         SDValue LeftSV0, LeftSV1;
22546         SmallVector<int, 4> LeftMask;
22547         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22548             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22549           MergedLeft = true;
22550         } else {
22551           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22552           LeftSV0 = Op00, LeftSV1 = Op10;
22553         }
22554 
22555         bool MergedRight = false;
22556         SDValue RightSV0, RightSV1;
22557         SmallVector<int, 4> RightMask;
22558         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22559             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22560           MergedRight = true;
22561         } else {
22562           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22563           RightSV0 = Op01, RightSV1 = Op11;
22564         }
22565 
22566         if (MergedLeft || MergedRight) {
22567           SDLoc DL(N);
22568           SDValue LHS = DAG.getVectorShuffle(
22569               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22570               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22571           SDValue RHS = DAG.getVectorShuffle(
22572               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22573               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22574           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22575         }
22576       }
22577     }
22578   }
22579 
22580   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22581     return V;
22582 
22583   return SDValue();
22584 }
22585 
22586 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22587   SDValue InVal = N->getOperand(0);
22588   EVT VT = N->getValueType(0);
22589 
22590   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22591   // with a VECTOR_SHUFFLE and possible truncate.
22592   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22593       VT.isFixedLengthVector() &&
22594       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22595     SDValue InVec = InVal->getOperand(0);
22596     SDValue EltNo = InVal->getOperand(1);
22597     auto InVecT = InVec.getValueType();
22598     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22599       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22600       int Elt = C0->getZExtValue();
22601       NewMask[0] = Elt;
22602       // If we have an implict truncate do truncate here as long as it's legal.
22603       // if it's not legal, this should
22604       if (VT.getScalarType() != InVal.getValueType() &&
22605           InVal.getValueType().isScalarInteger() &&
22606           isTypeLegal(VT.getScalarType())) {
22607         SDValue Val =
22608             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22609         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22610       }
22611       if (VT.getScalarType() == InVecT.getScalarType() &&
22612           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22613         SDValue LegalShuffle =
22614           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22615                                       DAG.getUNDEF(InVecT), NewMask, DAG);
22616         if (LegalShuffle) {
22617           // If the initial vector is the correct size this shuffle is a
22618           // valid result.
22619           if (VT == InVecT)
22620             return LegalShuffle;
22621           // If not we must truncate the vector.
22622           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22623             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22624             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22625                                          InVecT.getVectorElementType(),
22626                                          VT.getVectorNumElements());
22627             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22628                                LegalShuffle, ZeroIdx);
22629           }
22630         }
22631       }
22632     }
22633   }
22634 
22635   return SDValue();
22636 }
22637 
22638 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22639   EVT VT = N->getValueType(0);
22640   SDValue N0 = N->getOperand(0);
22641   SDValue N1 = N->getOperand(1);
22642   SDValue N2 = N->getOperand(2);
22643   uint64_t InsIdx = N->getConstantOperandVal(2);
22644 
22645   // If inserting an UNDEF, just return the original vector.
22646   if (N1.isUndef())
22647     return N0;
22648 
22649   // If this is an insert of an extracted vector into an undef vector, we can
22650   // just use the input to the extract.
22651   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22652       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
22653     return N1.getOperand(0);
22654 
22655   // If we are inserting a bitcast value into an undef, with the same
22656   // number of elements, just use the bitcast input of the extract.
22657   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
22658   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
22659   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
22660       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
22661       N1.getOperand(0).getOperand(1) == N2 &&
22662       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
22663           VT.getVectorElementCount() &&
22664       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
22665           VT.getSizeInBits()) {
22666     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
22667   }
22668 
22669   // If both N1 and N2 are bitcast values on which insert_subvector
22670   // would makes sense, pull the bitcast through.
22671   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
22672   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
22673   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
22674     SDValue CN0 = N0.getOperand(0);
22675     SDValue CN1 = N1.getOperand(0);
22676     EVT CN0VT = CN0.getValueType();
22677     EVT CN1VT = CN1.getValueType();
22678     if (CN0VT.isVector() && CN1VT.isVector() &&
22679         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
22680         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
22681       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
22682                                       CN0.getValueType(), CN0, CN1, N2);
22683       return DAG.getBitcast(VT, NewINSERT);
22684     }
22685   }
22686 
22687   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
22688   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
22689   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
22690   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22691       N0.getOperand(1).getValueType() == N1.getValueType() &&
22692       N0.getOperand(2) == N2)
22693     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
22694                        N1, N2);
22695 
22696   // Eliminate an intermediate insert into an undef vector:
22697   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
22698   // insert_subvector undef, X, N2
22699   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
22700       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
22701     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
22702                        N1.getOperand(1), N2);
22703 
22704   // Push subvector bitcasts to the output, adjusting the index as we go.
22705   // insert_subvector(bitcast(v), bitcast(s), c1)
22706   // -> bitcast(insert_subvector(v, s, c2))
22707   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
22708       N1.getOpcode() == ISD::BITCAST) {
22709     SDValue N0Src = peekThroughBitcasts(N0);
22710     SDValue N1Src = peekThroughBitcasts(N1);
22711     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
22712     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
22713     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
22714         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
22715       EVT NewVT;
22716       SDLoc DL(N);
22717       SDValue NewIdx;
22718       LLVMContext &Ctx = *DAG.getContext();
22719       ElementCount NumElts = VT.getVectorElementCount();
22720       unsigned EltSizeInBits = VT.getScalarSizeInBits();
22721       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
22722         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
22723         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
22724         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
22725       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
22726         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
22727         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
22728           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
22729                                    NumElts.divideCoefficientBy(Scale));
22730           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
22731         }
22732       }
22733       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
22734         SDValue Res = DAG.getBitcast(NewVT, N0Src);
22735         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
22736         return DAG.getBitcast(VT, Res);
22737       }
22738     }
22739   }
22740 
22741   // Canonicalize insert_subvector dag nodes.
22742   // Example:
22743   // (insert_subvector (insert_subvector A, Idx0), Idx1)
22744   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
22745   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
22746       N1.getValueType() == N0.getOperand(1).getValueType()) {
22747     unsigned OtherIdx = N0.getConstantOperandVal(2);
22748     if (InsIdx < OtherIdx) {
22749       // Swap nodes.
22750       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
22751                                   N0.getOperand(0), N1, N2);
22752       AddToWorklist(NewOp.getNode());
22753       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
22754                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
22755     }
22756   }
22757 
22758   // If the input vector is a concatenation, and the insert replaces
22759   // one of the pieces, we can optimize into a single concat_vectors.
22760   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
22761       N0.getOperand(0).getValueType() == N1.getValueType() &&
22762       N0.getOperand(0).getValueType().isScalableVector() ==
22763           N1.getValueType().isScalableVector()) {
22764     unsigned Factor = N1.getValueType().getVectorMinNumElements();
22765     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
22766     Ops[InsIdx / Factor] = N1;
22767     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
22768   }
22769 
22770   // Simplify source operands based on insertion.
22771   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22772     return SDValue(N, 0);
22773 
22774   return SDValue();
22775 }
22776 
22777 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
22778   SDValue N0 = N->getOperand(0);
22779 
22780   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
22781   if (N0->getOpcode() == ISD::FP16_TO_FP)
22782     return N0->getOperand(0);
22783 
22784   return SDValue();
22785 }
22786 
22787 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
22788   SDValue N0 = N->getOperand(0);
22789 
22790   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
22791   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
22792     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
22793     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
22794       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
22795                          N0.getOperand(0));
22796     }
22797   }
22798 
22799   return SDValue();
22800 }
22801 
22802 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
22803   SDValue N0 = N->getOperand(0);
22804   EVT VT = N0.getValueType();
22805   unsigned Opcode = N->getOpcode();
22806 
22807   // VECREDUCE over 1-element vector is just an extract.
22808   if (VT.getVectorElementCount().isScalar()) {
22809     SDLoc dl(N);
22810     SDValue Res =
22811         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
22812                     DAG.getVectorIdxConstant(0, dl));
22813     if (Res.getValueType() != N->getValueType(0))
22814       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
22815     return Res;
22816   }
22817 
22818   // On an boolean vector an and/or reduction is the same as a umin/umax
22819   // reduction. Convert them if the latter is legal while the former isn't.
22820   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
22821     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
22822         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
22823     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
22824         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
22825         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
22826       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
22827   }
22828 
22829   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
22830   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
22831   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
22832       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
22833     SDValue Vec = N0.getOperand(0);
22834     SDValue Subvec = N0.getOperand(1);
22835     if ((Opcode == ISD::VECREDUCE_OR &&
22836          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
22837         (Opcode == ISD::VECREDUCE_AND &&
22838          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
22839       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
22840   }
22841 
22842   return SDValue();
22843 }
22844 
22845 SDValue DAGCombiner::visitVPOp(SDNode *N) {
22846   // VP operations in which all vector elements are disabled - either by
22847   // determining that the mask is all false or that the EVL is 0 - can be
22848   // eliminated.
22849   bool AreAllEltsDisabled = false;
22850   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
22851     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
22852   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
22853     AreAllEltsDisabled |=
22854         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
22855 
22856   // This is the only generic VP combine we support for now.
22857   if (!AreAllEltsDisabled)
22858     return SDValue();
22859 
22860   // Binary operations can be replaced by UNDEF.
22861   if (ISD::isVPBinaryOp(N->getOpcode()))
22862     return DAG.getUNDEF(N->getValueType(0));
22863 
22864   // VP Memory operations can be replaced by either the chain (stores) or the
22865   // chain + undef (loads).
22866   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
22867     if (MemSD->writeMem())
22868       return MemSD->getChain();
22869     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
22870   }
22871 
22872   // Reduction operations return the start operand when no elements are active.
22873   if (ISD::isVPReduction(N->getOpcode()))
22874     return N->getOperand(0);
22875 
22876   return SDValue();
22877 }
22878 
22879 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
22880 /// with the destination vector and a zero vector.
22881 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
22882 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
22883 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
22884   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
22885 
22886   EVT VT = N->getValueType(0);
22887   SDValue LHS = N->getOperand(0);
22888   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
22889   SDLoc DL(N);
22890 
22891   // Make sure we're not running after operation legalization where it
22892   // may have custom lowered the vector shuffles.
22893   if (LegalOperations)
22894     return SDValue();
22895 
22896   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
22897     return SDValue();
22898 
22899   EVT RVT = RHS.getValueType();
22900   unsigned NumElts = RHS.getNumOperands();
22901 
22902   // Attempt to create a valid clear mask, splitting the mask into
22903   // sub elements and checking to see if each is
22904   // all zeros or all ones - suitable for shuffle masking.
22905   auto BuildClearMask = [&](int Split) {
22906     int NumSubElts = NumElts * Split;
22907     int NumSubBits = RVT.getScalarSizeInBits() / Split;
22908 
22909     SmallVector<int, 8> Indices;
22910     for (int i = 0; i != NumSubElts; ++i) {
22911       int EltIdx = i / Split;
22912       int SubIdx = i % Split;
22913       SDValue Elt = RHS.getOperand(EltIdx);
22914       // X & undef --> 0 (not undef). So this lane must be converted to choose
22915       // from the zero constant vector (same as if the element had all 0-bits).
22916       if (Elt.isUndef()) {
22917         Indices.push_back(i + NumSubElts);
22918         continue;
22919       }
22920 
22921       APInt Bits;
22922       if (isa<ConstantSDNode>(Elt))
22923         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
22924       else if (isa<ConstantFPSDNode>(Elt))
22925         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
22926       else
22927         return SDValue();
22928 
22929       // Extract the sub element from the constant bit mask.
22930       if (DAG.getDataLayout().isBigEndian())
22931         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
22932       else
22933         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
22934 
22935       if (Bits.isAllOnes())
22936         Indices.push_back(i);
22937       else if (Bits == 0)
22938         Indices.push_back(i + NumSubElts);
22939       else
22940         return SDValue();
22941     }
22942 
22943     // Let's see if the target supports this vector_shuffle.
22944     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
22945     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
22946     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
22947       return SDValue();
22948 
22949     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
22950     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
22951                                                    DAG.getBitcast(ClearVT, LHS),
22952                                                    Zero, Indices));
22953   };
22954 
22955   // Determine maximum split level (byte level masking).
22956   int MaxSplit = 1;
22957   if (RVT.getScalarSizeInBits() % 8 == 0)
22958     MaxSplit = RVT.getScalarSizeInBits() / 8;
22959 
22960   for (int Split = 1; Split <= MaxSplit; ++Split)
22961     if (RVT.getScalarSizeInBits() % Split == 0)
22962       if (SDValue S = BuildClearMask(Split))
22963         return S;
22964 
22965   return SDValue();
22966 }
22967 
22968 /// If a vector binop is performed on splat values, it may be profitable to
22969 /// extract, scalarize, and insert/splat.
22970 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
22971                                       const SDLoc &DL) {
22972   SDValue N0 = N->getOperand(0);
22973   SDValue N1 = N->getOperand(1);
22974   unsigned Opcode = N->getOpcode();
22975   EVT VT = N->getValueType(0);
22976   EVT EltVT = VT.getVectorElementType();
22977   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22978 
22979   // TODO: Remove/replace the extract cost check? If the elements are available
22980   //       as scalars, then there may be no extract cost. Should we ask if
22981   //       inserting a scalar back into a vector is cheap instead?
22982   int Index0, Index1;
22983   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
22984   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
22985   if (!Src0 || !Src1 || Index0 != Index1 ||
22986       Src0.getValueType().getVectorElementType() != EltVT ||
22987       Src1.getValueType().getVectorElementType() != EltVT ||
22988       !TLI.isExtractVecEltCheap(VT, Index0) ||
22989       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
22990     return SDValue();
22991 
22992   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
22993   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
22994   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
22995   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
22996 
22997   // If all lanes but 1 are undefined, no need to splat the scalar result.
22998   // TODO: Keep track of undefs and use that info in the general case.
22999   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
23000       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
23001       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
23002     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
23003     // build_vec ..undef, (bo X, Y), undef...
23004     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
23005     Ops[Index0] = ScalarBO;
23006     return DAG.getBuildVector(VT, DL, Ops);
23007   }
23008 
23009   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
23010   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
23011   return DAG.getBuildVector(VT, DL, Ops);
23012 }
23013 
23014 /// Visit a binary vector operation, like ADD.
23015 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
23016   EVT VT = N->getValueType(0);
23017   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
23018 
23019   SDValue LHS = N->getOperand(0);
23020   SDValue RHS = N->getOperand(1);
23021   unsigned Opcode = N->getOpcode();
23022   SDNodeFlags Flags = N->getFlags();
23023 
23024   // Move unary shuffles with identical masks after a vector binop:
23025   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
23026   //   --> shuffle (VBinOp A, B), Undef, Mask
23027   // This does not require type legality checks because we are creating the
23028   // same types of operations that are in the original sequence. We do have to
23029   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
23030   // though. This code is adapted from the identical transform in instcombine.
23031   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
23032       Opcode != ISD::UREM && Opcode != ISD::SREM &&
23033       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
23034     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
23035     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
23036     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
23037         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
23038         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
23039       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
23040                                      RHS.getOperand(0), Flags);
23041       SDValue UndefV = LHS.getOperand(1);
23042       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
23043     }
23044 
23045     // Try to sink a splat shuffle after a binop with a uniform constant.
23046     // This is limited to cases where neither the shuffle nor the constant have
23047     // undefined elements because that could be poison-unsafe or inhibit
23048     // demanded elements analysis. It is further limited to not change a splat
23049     // of an inserted scalar because that may be optimized better by
23050     // load-folding or other target-specific behaviors.
23051     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
23052         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
23053         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23054       // binop (splat X), (splat C) --> splat (binop X, C)
23055       SDValue X = Shuf0->getOperand(0);
23056       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
23057       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23058                                   Shuf0->getMask());
23059     }
23060     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
23061         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
23062         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23063       // binop (splat C), (splat X) --> splat (binop C, X)
23064       SDValue X = Shuf1->getOperand(0);
23065       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
23066       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23067                                   Shuf1->getMask());
23068     }
23069   }
23070 
23071   // The following pattern is likely to emerge with vector reduction ops. Moving
23072   // the binary operation ahead of insertion may allow using a narrower vector
23073   // instruction that has better performance than the wide version of the op:
23074   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
23075   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
23076       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
23077       LHS.getOperand(2) == RHS.getOperand(2) &&
23078       (LHS.hasOneUse() || RHS.hasOneUse())) {
23079     SDValue X = LHS.getOperand(1);
23080     SDValue Y = RHS.getOperand(1);
23081     SDValue Z = LHS.getOperand(2);
23082     EVT NarrowVT = X.getValueType();
23083     if (NarrowVT == Y.getValueType() &&
23084         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
23085                                               LegalOperations)) {
23086       // (binop undef, undef) may not return undef, so compute that result.
23087       SDValue VecC =
23088           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
23089       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
23090       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
23091     }
23092   }
23093 
23094   // Make sure all but the first op are undef or constant.
23095   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
23096     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
23097            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
23098              return Op.isUndef() ||
23099                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
23100            });
23101   };
23102 
23103   // The following pattern is likely to emerge with vector reduction ops. Moving
23104   // the binary operation ahead of the concat may allow using a narrower vector
23105   // instruction that has better performance than the wide version of the op:
23106   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
23107   //   concat (VBinOp X, Y), VecC
23108   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
23109       (LHS.hasOneUse() || RHS.hasOneUse())) {
23110     EVT NarrowVT = LHS.getOperand(0).getValueType();
23111     if (NarrowVT == RHS.getOperand(0).getValueType() &&
23112         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
23113       unsigned NumOperands = LHS.getNumOperands();
23114       SmallVector<SDValue, 4> ConcatOps;
23115       for (unsigned i = 0; i != NumOperands; ++i) {
23116         // This constant fold for operands 1 and up.
23117         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
23118                                         RHS.getOperand(i)));
23119       }
23120 
23121       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23122     }
23123   }
23124 
23125   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
23126     return V;
23127 
23128   return SDValue();
23129 }
23130 
23131 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
23132                                     SDValue N2) {
23133   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
23134 
23135   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
23136                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
23137 
23138   // If we got a simplified select_cc node back from SimplifySelectCC, then
23139   // break it down into a new SETCC node, and a new SELECT node, and then return
23140   // the SELECT node, since we were called with a SELECT node.
23141   if (SCC.getNode()) {
23142     // Check to see if we got a select_cc back (to turn into setcc/select).
23143     // Otherwise, just return whatever node we got back, like fabs.
23144     if (SCC.getOpcode() == ISD::SELECT_CC) {
23145       const SDNodeFlags Flags = N0->getFlags();
23146       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
23147                                   N0.getValueType(),
23148                                   SCC.getOperand(0), SCC.getOperand(1),
23149                                   SCC.getOperand(4), Flags);
23150       AddToWorklist(SETCC.getNode());
23151       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
23152                                          SCC.getOperand(2), SCC.getOperand(3));
23153       SelectNode->setFlags(Flags);
23154       return SelectNode;
23155     }
23156 
23157     return SCC;
23158   }
23159   return SDValue();
23160 }
23161 
23162 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
23163 /// being selected between, see if we can simplify the select.  Callers of this
23164 /// should assume that TheSelect is deleted if this returns true.  As such, they
23165 /// should return the appropriate thing (e.g. the node) back to the top-level of
23166 /// the DAG combiner loop to avoid it being looked at.
23167 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
23168                                     SDValue RHS) {
23169   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23170   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
23171   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
23172     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
23173       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
23174       SDValue Sqrt = RHS;
23175       ISD::CondCode CC;
23176       SDValue CmpLHS;
23177       const ConstantFPSDNode *Zero = nullptr;
23178 
23179       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
23180         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
23181         CmpLHS = TheSelect->getOperand(0);
23182         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
23183       } else {
23184         // SELECT or VSELECT
23185         SDValue Cmp = TheSelect->getOperand(0);
23186         if (Cmp.getOpcode() == ISD::SETCC) {
23187           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
23188           CmpLHS = Cmp.getOperand(0);
23189           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
23190         }
23191       }
23192       if (Zero && Zero->isZero() &&
23193           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
23194           CC == ISD::SETULT || CC == ISD::SETLT)) {
23195         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23196         CombineTo(TheSelect, Sqrt);
23197         return true;
23198       }
23199     }
23200   }
23201   // Cannot simplify select with vector condition
23202   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
23203 
23204   // If this is a select from two identical things, try to pull the operation
23205   // through the select.
23206   if (LHS.getOpcode() != RHS.getOpcode() ||
23207       !LHS.hasOneUse() || !RHS.hasOneUse())
23208     return false;
23209 
23210   // If this is a load and the token chain is identical, replace the select
23211   // of two loads with a load through a select of the address to load from.
23212   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
23213   // constants have been dropped into the constant pool.
23214   if (LHS.getOpcode() == ISD::LOAD) {
23215     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
23216     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
23217 
23218     // Token chains must be identical.
23219     if (LHS.getOperand(0) != RHS.getOperand(0) ||
23220         // Do not let this transformation reduce the number of volatile loads.
23221         // Be conservative for atomics for the moment
23222         // TODO: This does appear to be legal for unordered atomics (see D66309)
23223         !LLD->isSimple() || !RLD->isSimple() ||
23224         // FIXME: If either is a pre/post inc/dec load,
23225         // we'd need to split out the address adjustment.
23226         LLD->isIndexed() || RLD->isIndexed() ||
23227         // If this is an EXTLOAD, the VT's must match.
23228         LLD->getMemoryVT() != RLD->getMemoryVT() ||
23229         // If this is an EXTLOAD, the kind of extension must match.
23230         (LLD->getExtensionType() != RLD->getExtensionType() &&
23231          // The only exception is if one of the extensions is anyext.
23232          LLD->getExtensionType() != ISD::EXTLOAD &&
23233          RLD->getExtensionType() != ISD::EXTLOAD) ||
23234         // FIXME: this discards src value information.  This is
23235         // over-conservative. It would be beneficial to be able to remember
23236         // both potential memory locations.  Since we are discarding
23237         // src value info, don't do the transformation if the memory
23238         // locations are not in the default address space.
23239         LLD->getPointerInfo().getAddrSpace() != 0 ||
23240         RLD->getPointerInfo().getAddrSpace() != 0 ||
23241         // We can't produce a CMOV of a TargetFrameIndex since we won't
23242         // generate the address generation required.
23243         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23244         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23245         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
23246                                       LLD->getBasePtr().getValueType()))
23247       return false;
23248 
23249     // The loads must not depend on one another.
23250     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
23251       return false;
23252 
23253     // Check that the select condition doesn't reach either load.  If so,
23254     // folding this will induce a cycle into the DAG.  If not, this is safe to
23255     // xform, so create a select of the addresses.
23256 
23257     SmallPtrSet<const SDNode *, 32> Visited;
23258     SmallVector<const SDNode *, 16> Worklist;
23259 
23260     // Always fail if LLD and RLD are not independent. TheSelect is a
23261     // predecessor to all Nodes in question so we need not search past it.
23262 
23263     Visited.insert(TheSelect);
23264     Worklist.push_back(LLD);
23265     Worklist.push_back(RLD);
23266 
23267     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23268         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23269       return false;
23270 
23271     SDValue Addr;
23272     if (TheSelect->getOpcode() == ISD::SELECT) {
23273       // We cannot do this optimization if any pair of {RLD, LLD} is a
23274       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23275       // Loads, we only need to check if CondNode is a successor to one of the
23276       // loads. We can further avoid this if there's no use of their chain
23277       // value.
23278       SDNode *CondNode = TheSelect->getOperand(0).getNode();
23279       Worklist.push_back(CondNode);
23280 
23281       if ((LLD->hasAnyUseOfValue(1) &&
23282            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23283           (RLD->hasAnyUseOfValue(1) &&
23284            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23285         return false;
23286 
23287       Addr = DAG.getSelect(SDLoc(TheSelect),
23288                            LLD->getBasePtr().getValueType(),
23289                            TheSelect->getOperand(0), LLD->getBasePtr(),
23290                            RLD->getBasePtr());
23291     } else {  // Otherwise SELECT_CC
23292       // We cannot do this optimization if any pair of {RLD, LLD} is a
23293       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23294       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23295       // one of the loads. We can further avoid this if there's no use of their
23296       // chain value.
23297 
23298       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23299       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23300       Worklist.push_back(CondLHS);
23301       Worklist.push_back(CondRHS);
23302 
23303       if ((LLD->hasAnyUseOfValue(1) &&
23304            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23305           (RLD->hasAnyUseOfValue(1) &&
23306            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23307         return false;
23308 
23309       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23310                          LLD->getBasePtr().getValueType(),
23311                          TheSelect->getOperand(0),
23312                          TheSelect->getOperand(1),
23313                          LLD->getBasePtr(), RLD->getBasePtr(),
23314                          TheSelect->getOperand(4));
23315     }
23316 
23317     SDValue Load;
23318     // It is safe to replace the two loads if they have different alignments,
23319     // but the new load must be the minimum (most restrictive) alignment of the
23320     // inputs.
23321     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23322     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23323     if (!RLD->isInvariant())
23324       MMOFlags &= ~MachineMemOperand::MOInvariant;
23325     if (!RLD->isDereferenceable())
23326       MMOFlags &= ~MachineMemOperand::MODereferenceable;
23327     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23328       // FIXME: Discards pointer and AA info.
23329       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23330                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23331                          MMOFlags);
23332     } else {
23333       // FIXME: Discards pointer and AA info.
23334       Load = DAG.getExtLoad(
23335           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23336                                                   : LLD->getExtensionType(),
23337           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23338           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23339     }
23340 
23341     // Users of the select now use the result of the load.
23342     CombineTo(TheSelect, Load);
23343 
23344     // Users of the old loads now use the new load's chain.  We know the
23345     // old-load value is dead now.
23346     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23347     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23348     return true;
23349   }
23350 
23351   return false;
23352 }
23353 
23354 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23355 /// bitwise 'and'.
23356 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23357                                             SDValue N1, SDValue N2, SDValue N3,
23358                                             ISD::CondCode CC) {
23359   // If this is a select where the false operand is zero and the compare is a
23360   // check of the sign bit, see if we can perform the "gzip trick":
23361   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23362   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23363   EVT XType = N0.getValueType();
23364   EVT AType = N2.getValueType();
23365   if (!isNullConstant(N3) || !XType.bitsGE(AType))
23366     return SDValue();
23367 
23368   // If the comparison is testing for a positive value, we have to invert
23369   // the sign bit mask, so only do that transform if the target has a bitwise
23370   // 'and not' instruction (the invert is free).
23371   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23372     // (X > -1) ? A : 0
23373     // (X >  0) ? X : 0 <-- This is canonical signed max.
23374     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23375       return SDValue();
23376   } else if (CC == ISD::SETLT) {
23377     // (X <  0) ? A : 0
23378     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
23379     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23380       return SDValue();
23381   } else {
23382     return SDValue();
23383   }
23384 
23385   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23386   // constant.
23387   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23388   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23389   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23390     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23391     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23392       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23393       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23394       AddToWorklist(Shift.getNode());
23395 
23396       if (XType.bitsGT(AType)) {
23397         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23398         AddToWorklist(Shift.getNode());
23399       }
23400 
23401       if (CC == ISD::SETGT)
23402         Shift = DAG.getNOT(DL, Shift, AType);
23403 
23404       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23405     }
23406   }
23407 
23408   unsigned ShCt = XType.getSizeInBits() - 1;
23409   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23410     return SDValue();
23411 
23412   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23413   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23414   AddToWorklist(Shift.getNode());
23415 
23416   if (XType.bitsGT(AType)) {
23417     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23418     AddToWorklist(Shift.getNode());
23419   }
23420 
23421   if (CC == ISD::SETGT)
23422     Shift = DAG.getNOT(DL, Shift, AType);
23423 
23424   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23425 }
23426 
23427 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23428 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23429   SDValue N0 = N->getOperand(0);
23430   SDValue N1 = N->getOperand(1);
23431   SDValue N2 = N->getOperand(2);
23432   EVT VT = N->getValueType(0);
23433   SDLoc DL(N);
23434 
23435   unsigned BinOpc = N1.getOpcode();
23436   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23437     return SDValue();
23438 
23439   // The use checks are intentionally on SDNode because we may be dealing
23440   // with opcodes that produce more than one SDValue.
23441   // TODO: Do we really need to check N0 (the condition operand of the select)?
23442   //       But removing that clause could cause an infinite loop...
23443   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23444     return SDValue();
23445 
23446   // Binops may include opcodes that return multiple values, so all values
23447   // must be created/propagated from the newly created binops below.
23448   SDVTList OpVTs = N1->getVTList();
23449 
23450   // Fold select(cond, binop(x, y), binop(z, y))
23451   //  --> binop(select(cond, x, z), y)
23452   if (N1.getOperand(1) == N2.getOperand(1)) {
23453     SDValue NewSel =
23454         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23455     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23456     NewBinOp->setFlags(N1->getFlags());
23457     NewBinOp->intersectFlagsWith(N2->getFlags());
23458     return NewBinOp;
23459   }
23460 
23461   // Fold select(cond, binop(x, y), binop(x, z))
23462   //  --> binop(x, select(cond, y, z))
23463   // Second op VT might be different (e.g. shift amount type)
23464   if (N1.getOperand(0) == N2.getOperand(0) &&
23465       VT == N1.getOperand(1).getValueType() &&
23466       VT == N2.getOperand(1).getValueType()) {
23467     SDValue NewSel =
23468         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23469     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23470     NewBinOp->setFlags(N1->getFlags());
23471     NewBinOp->intersectFlagsWith(N2->getFlags());
23472     return NewBinOp;
23473   }
23474 
23475   // TODO: Handle isCommutativeBinOp patterns as well?
23476   return SDValue();
23477 }
23478 
23479 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23480 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23481   SDValue N0 = N->getOperand(0);
23482   EVT VT = N->getValueType(0);
23483   bool IsFabs = N->getOpcode() == ISD::FABS;
23484   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23485 
23486   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23487     return SDValue();
23488 
23489   SDValue Int = N0.getOperand(0);
23490   EVT IntVT = Int.getValueType();
23491 
23492   // The operand to cast should be integer.
23493   if (!IntVT.isInteger() || IntVT.isVector())
23494     return SDValue();
23495 
23496   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23497   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23498   APInt SignMask;
23499   if (N0.getValueType().isVector()) {
23500     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23501     // 0x7f...) per element and splat it.
23502     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23503     if (IsFabs)
23504       SignMask = ~SignMask;
23505     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23506   } else {
23507     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23508     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23509     if (IsFabs)
23510       SignMask = ~SignMask;
23511   }
23512   SDLoc DL(N0);
23513   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23514                     DAG.getConstant(SignMask, DL, IntVT));
23515   AddToWorklist(Int.getNode());
23516   return DAG.getBitcast(VT, Int);
23517 }
23518 
23519 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23520 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23521 /// in it. This may be a win when the constant is not otherwise available
23522 /// because it replaces two constant pool loads with one.
23523 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23524     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23525     ISD::CondCode CC) {
23526   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23527     return SDValue();
23528 
23529   // If we are before legalize types, we want the other legalization to happen
23530   // first (for example, to avoid messing with soft float).
23531   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23532   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23533   EVT VT = N2.getValueType();
23534   if (!TV || !FV || !TLI.isTypeLegal(VT))
23535     return SDValue();
23536 
23537   // If a constant can be materialized without loads, this does not make sense.
23538   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23539       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23540       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23541     return SDValue();
23542 
23543   // If both constants have multiple uses, then we won't need to do an extra
23544   // load. The values are likely around in registers for other users.
23545   if (!TV->hasOneUse() && !FV->hasOneUse())
23546     return SDValue();
23547 
23548   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23549                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23550   Type *FPTy = Elts[0]->getType();
23551   const DataLayout &TD = DAG.getDataLayout();
23552 
23553   // Create a ConstantArray of the two constants.
23554   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23555   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23556                                       TD.getPrefTypeAlign(FPTy));
23557   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23558 
23559   // Get offsets to the 0 and 1 elements of the array, so we can select between
23560   // them.
23561   SDValue Zero = DAG.getIntPtrConstant(0, DL);
23562   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23563   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23564   SDValue Cond =
23565       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23566   AddToWorklist(Cond.getNode());
23567   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23568   AddToWorklist(CstOffset.getNode());
23569   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23570   AddToWorklist(CPIdx.getNode());
23571   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23572                      MachinePointerInfo::getConstantPool(
23573                          DAG.getMachineFunction()), Alignment);
23574 }
23575 
23576 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23577 /// where 'cond' is the comparison specified by CC.
23578 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23579                                       SDValue N2, SDValue N3, ISD::CondCode CC,
23580                                       bool NotExtCompare) {
23581   // (x ? y : y) -> y.
23582   if (N2 == N3) return N2;
23583 
23584   EVT CmpOpVT = N0.getValueType();
23585   EVT CmpResVT = getSetCCResultType(CmpOpVT);
23586   EVT VT = N2.getValueType();
23587   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23588   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23589   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23590 
23591   // Determine if the condition we're dealing with is constant.
23592   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23593     AddToWorklist(SCC.getNode());
23594     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23595       // fold select_cc true, x, y -> x
23596       // fold select_cc false, x, y -> y
23597       return !(SCCC->isZero()) ? N2 : N3;
23598     }
23599   }
23600 
23601   if (SDValue V =
23602           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23603     return V;
23604 
23605   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23606     return V;
23607 
23608   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23609   // where y is has a single bit set.
23610   // A plaintext description would be, we can turn the SELECT_CC into an AND
23611   // when the condition can be materialized as an all-ones register.  Any
23612   // single bit-test can be materialized as an all-ones register with
23613   // shift-left and shift-right-arith.
23614   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23615       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23616     SDValue AndLHS = N0->getOperand(0);
23617     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23618     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23619       // Shift the tested bit over the sign bit.
23620       const APInt &AndMask = ConstAndRHS->getAPIntValue();
23621       unsigned ShCt = AndMask.getBitWidth() - 1;
23622       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23623         SDValue ShlAmt =
23624           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23625                           getShiftAmountTy(AndLHS.getValueType()));
23626         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23627 
23628         // Now arithmetic right shift it all the way over, so the result is
23629         // either all-ones, or zero.
23630         SDValue ShrAmt =
23631           DAG.getConstant(ShCt, SDLoc(Shl),
23632                           getShiftAmountTy(Shl.getValueType()));
23633         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
23634 
23635         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
23636       }
23637     }
23638   }
23639 
23640   // fold select C, 16, 0 -> shl C, 4
23641   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
23642   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
23643 
23644   if ((Fold || Swap) &&
23645       TLI.getBooleanContents(CmpOpVT) ==
23646           TargetLowering::ZeroOrOneBooleanContent &&
23647       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
23648 
23649     if (Swap) {
23650       CC = ISD::getSetCCInverse(CC, CmpOpVT);
23651       std::swap(N2C, N3C);
23652     }
23653 
23654     // If the caller doesn't want us to simplify this into a zext of a compare,
23655     // don't do it.
23656     if (NotExtCompare && N2C->isOne())
23657       return SDValue();
23658 
23659     SDValue Temp, SCC;
23660     // zext (setcc n0, n1)
23661     if (LegalTypes) {
23662       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
23663       if (VT.bitsLT(SCC.getValueType()))
23664         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
23665       else
23666         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23667     } else {
23668       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
23669       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
23670     }
23671 
23672     AddToWorklist(SCC.getNode());
23673     AddToWorklist(Temp.getNode());
23674 
23675     if (N2C->isOne())
23676       return Temp;
23677 
23678     unsigned ShCt = N2C->getAPIntValue().logBase2();
23679     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
23680       return SDValue();
23681 
23682     // shl setcc result by log2 n2c
23683     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
23684                        DAG.getConstant(ShCt, SDLoc(Temp),
23685                                        getShiftAmountTy(Temp.getValueType())));
23686   }
23687 
23688   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
23689   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
23690   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
23691   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
23692   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
23693   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
23694   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
23695   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
23696   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
23697     SDValue ValueOnZero = N2;
23698     SDValue Count = N3;
23699     // If the condition is NE instead of E, swap the operands.
23700     if (CC == ISD::SETNE)
23701       std::swap(ValueOnZero, Count);
23702     // Check if the value on zero is a constant equal to the bits in the type.
23703     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
23704       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
23705         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
23706         // legal, combine to just cttz.
23707         if ((Count.getOpcode() == ISD::CTTZ ||
23708              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
23709             N0 == Count.getOperand(0) &&
23710             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
23711           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
23712         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
23713         // legal, combine to just ctlz.
23714         if ((Count.getOpcode() == ISD::CTLZ ||
23715              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
23716             N0 == Count.getOperand(0) &&
23717             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
23718           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
23719       }
23720     }
23721   }
23722 
23723   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
23724   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
23725   if (!NotExtCompare && N1C && N2C && N3C &&
23726       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
23727       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
23728        (N1C->isZero() && CC == ISD::SETLT)) &&
23729       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
23730     SDValue ASR = DAG.getNode(
23731         ISD::SRA, DL, CmpOpVT, N0,
23732         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
23733     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
23734                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
23735   }
23736 
23737   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23738     return S;
23739   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
23740     return S;
23741 
23742   return SDValue();
23743 }
23744 
23745 /// This is a stub for TargetLowering::SimplifySetCC.
23746 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
23747                                    ISD::CondCode Cond, const SDLoc &DL,
23748                                    bool foldBooleans) {
23749   TargetLowering::DAGCombinerInfo
23750     DagCombineInfo(DAG, Level, false, this);
23751   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
23752 }
23753 
23754 /// Given an ISD::SDIV node expressing a divide by constant, return
23755 /// a DAG expression to select that will generate the same value by multiplying
23756 /// by a magic number.
23757 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23758 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
23759   // when optimising for minimum size, we don't want to expand a div to a mul
23760   // and a shift.
23761   if (DAG.getMachineFunction().getFunction().hasMinSize())
23762     return SDValue();
23763 
23764   SmallVector<SDNode *, 8> Built;
23765   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
23766     for (SDNode *N : Built)
23767       AddToWorklist(N);
23768     return S;
23769   }
23770 
23771   return SDValue();
23772 }
23773 
23774 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
23775 /// DAG expression that will generate the same value by right shifting.
23776 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
23777   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
23778   if (!C)
23779     return SDValue();
23780 
23781   // Avoid division by zero.
23782   if (C->isZero())
23783     return SDValue();
23784 
23785   SmallVector<SDNode *, 8> Built;
23786   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
23787     for (SDNode *N : Built)
23788       AddToWorklist(N);
23789     return S;
23790   }
23791 
23792   return SDValue();
23793 }
23794 
23795 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
23796 /// expression that will generate the same value by multiplying by a magic
23797 /// number.
23798 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
23799 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
23800   // when optimising for minimum size, we don't want to expand a div to a mul
23801   // and a shift.
23802   if (DAG.getMachineFunction().getFunction().hasMinSize())
23803     return SDValue();
23804 
23805   SmallVector<SDNode *, 8> Built;
23806   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
23807     for (SDNode *N : Built)
23808       AddToWorklist(N);
23809     return S;
23810   }
23811 
23812   return SDValue();
23813 }
23814 
23815 /// Determines the LogBase2 value for a non-null input value using the
23816 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
23817 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
23818   EVT VT = V.getValueType();
23819   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
23820   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
23821   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
23822   return LogBase2;
23823 }
23824 
23825 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23826 /// For the reciprocal, we need to find the zero of the function:
23827 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
23828 ///     =>
23829 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
23830 ///     does not require additional intermediate precision]
23831 /// For the last iteration, put numerator N into it to gain more precision:
23832 ///   Result = N X_i + X_i (N - N A X_i)
23833 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
23834                                       SDNodeFlags Flags) {
23835   if (LegalDAG)
23836     return SDValue();
23837 
23838   // TODO: Handle extended types?
23839   EVT VT = Op.getValueType();
23840   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23841       VT.getScalarType() != MVT::f64)
23842     return SDValue();
23843 
23844   // If estimates are explicitly disabled for this function, we're done.
23845   MachineFunction &MF = DAG.getMachineFunction();
23846   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
23847   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23848     return SDValue();
23849 
23850   // Estimates may be explicitly enabled for this type with a custom number of
23851   // refinement steps.
23852   int Iterations = TLI.getDivRefinementSteps(VT, MF);
23853   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
23854     AddToWorklist(Est.getNode());
23855 
23856     SDLoc DL(Op);
23857     if (Iterations) {
23858       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
23859 
23860       // Newton iterations: Est = Est + Est (N - Arg * Est)
23861       // If this is the last iteration, also multiply by the numerator.
23862       for (int i = 0; i < Iterations; ++i) {
23863         SDValue MulEst = Est;
23864 
23865         if (i == Iterations - 1) {
23866           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
23867           AddToWorklist(MulEst.getNode());
23868         }
23869 
23870         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
23871         AddToWorklist(NewEst.getNode());
23872 
23873         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
23874                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
23875         AddToWorklist(NewEst.getNode());
23876 
23877         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23878         AddToWorklist(NewEst.getNode());
23879 
23880         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
23881         AddToWorklist(Est.getNode());
23882       }
23883     } else {
23884       // If no iterations are available, multiply with N.
23885       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
23886       AddToWorklist(Est.getNode());
23887     }
23888 
23889     return Est;
23890   }
23891 
23892   return SDValue();
23893 }
23894 
23895 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23896 /// For the reciprocal sqrt, we need to find the zero of the function:
23897 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23898 ///     =>
23899 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
23900 /// As a result, we precompute A/2 prior to the iteration loop.
23901 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
23902                                          unsigned Iterations,
23903                                          SDNodeFlags Flags, bool Reciprocal) {
23904   EVT VT = Arg.getValueType();
23905   SDLoc DL(Arg);
23906   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
23907 
23908   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
23909   // this entire sequence requires only one FP constant.
23910   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
23911   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
23912 
23913   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
23914   for (unsigned i = 0; i < Iterations; ++i) {
23915     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
23916     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
23917     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
23918     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
23919   }
23920 
23921   // If non-reciprocal square root is requested, multiply the result by Arg.
23922   if (!Reciprocal)
23923     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
23924 
23925   return Est;
23926 }
23927 
23928 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
23929 /// For the reciprocal sqrt, we need to find the zero of the function:
23930 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
23931 ///     =>
23932 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
23933 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
23934                                          unsigned Iterations,
23935                                          SDNodeFlags Flags, bool Reciprocal) {
23936   EVT VT = Arg.getValueType();
23937   SDLoc DL(Arg);
23938   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
23939   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
23940 
23941   // This routine must enter the loop below to work correctly
23942   // when (Reciprocal == false).
23943   assert(Iterations > 0);
23944 
23945   // Newton iterations for reciprocal square root:
23946   // E = (E * -0.5) * ((A * E) * E + -3.0)
23947   for (unsigned i = 0; i < Iterations; ++i) {
23948     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
23949     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
23950     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
23951 
23952     // When calculating a square root at the last iteration build:
23953     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
23954     // (notice a common subexpression)
23955     SDValue LHS;
23956     if (Reciprocal || (i + 1) < Iterations) {
23957       // RSQRT: LHS = (E * -0.5)
23958       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
23959     } else {
23960       // SQRT: LHS = (A * E) * -0.5
23961       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
23962     }
23963 
23964     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
23965   }
23966 
23967   return Est;
23968 }
23969 
23970 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
23971 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
23972 /// Op can be zero.
23973 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
23974                                            bool Reciprocal) {
23975   if (LegalDAG)
23976     return SDValue();
23977 
23978   // TODO: Handle extended types?
23979   EVT VT = Op.getValueType();
23980   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
23981       VT.getScalarType() != MVT::f64)
23982     return SDValue();
23983 
23984   // If estimates are explicitly disabled for this function, we're done.
23985   MachineFunction &MF = DAG.getMachineFunction();
23986   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
23987   if (Enabled == TLI.ReciprocalEstimate::Disabled)
23988     return SDValue();
23989 
23990   // Estimates may be explicitly enabled for this type with a custom number of
23991   // refinement steps.
23992   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
23993 
23994   bool UseOneConstNR = false;
23995   if (SDValue Est =
23996       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
23997                           Reciprocal)) {
23998     AddToWorklist(Est.getNode());
23999 
24000     if (Iterations)
24001       Est = UseOneConstNR
24002             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
24003             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
24004     if (!Reciprocal) {
24005       SDLoc DL(Op);
24006       // Try the target specific test first.
24007       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
24008 
24009       // The estimate is now completely wrong if the input was exactly 0.0 or
24010       // possibly a denormal. Force the answer to 0.0 or value provided by
24011       // target for those cases.
24012       Est = DAG.getNode(
24013           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
24014           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
24015     }
24016     return Est;
24017   }
24018 
24019   return SDValue();
24020 }
24021 
24022 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24023   return buildSqrtEstimateImpl(Op, Flags, true);
24024 }
24025 
24026 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24027   return buildSqrtEstimateImpl(Op, Flags, false);
24028 }
24029 
24030 /// Return true if there is any possibility that the two addresses overlap.
24031 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
24032 
24033   struct MemUseCharacteristics {
24034     bool IsVolatile;
24035     bool IsAtomic;
24036     SDValue BasePtr;
24037     int64_t Offset;
24038     Optional<int64_t> NumBytes;
24039     MachineMemOperand *MMO;
24040   };
24041 
24042   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
24043     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
24044       int64_t Offset = 0;
24045       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
24046         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
24047                      ? C->getSExtValue()
24048                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
24049                            ? -1 * C->getSExtValue()
24050                            : 0;
24051       uint64_t Size =
24052           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
24053       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
24054               Offset /*base offset*/,
24055               Optional<int64_t>(Size),
24056               LSN->getMemOperand()};
24057     }
24058     if (const auto *LN = cast<LifetimeSDNode>(N))
24059       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
24060               (LN->hasOffset()) ? LN->getOffset() : 0,
24061               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
24062                                 : Optional<int64_t>(),
24063               (MachineMemOperand *)nullptr};
24064     // Default.
24065     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
24066             (int64_t)0 /*offset*/,
24067             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
24068   };
24069 
24070   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
24071                         MUC1 = getCharacteristics(Op1);
24072 
24073   // If they are to the same address, then they must be aliases.
24074   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
24075       MUC0.Offset == MUC1.Offset)
24076     return true;
24077 
24078   // If they are both volatile then they cannot be reordered.
24079   if (MUC0.IsVolatile && MUC1.IsVolatile)
24080     return true;
24081 
24082   // Be conservative about atomics for the moment
24083   // TODO: This is way overconservative for unordered atomics (see D66309)
24084   if (MUC0.IsAtomic && MUC1.IsAtomic)
24085     return true;
24086 
24087   if (MUC0.MMO && MUC1.MMO) {
24088     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24089         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24090       return false;
24091   }
24092 
24093   // Try to prove that there is aliasing, or that there is no aliasing. Either
24094   // way, we can return now. If nothing can be proved, proceed with more tests.
24095   bool IsAlias;
24096   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
24097                                        DAG, IsAlias))
24098     return IsAlias;
24099 
24100   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
24101   // either are not known.
24102   if (!MUC0.MMO || !MUC1.MMO)
24103     return true;
24104 
24105   // If one operation reads from invariant memory, and the other may store, they
24106   // cannot alias. These should really be checking the equivalent of mayWrite,
24107   // but it only matters for memory nodes other than load /store.
24108   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24109       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24110     return false;
24111 
24112   // If we know required SrcValue1 and SrcValue2 have relatively large
24113   // alignment compared to the size and offset of the access, we may be able
24114   // to prove they do not alias. This check is conservative for now to catch
24115   // cases created by splitting vector types, it only works when the offsets are
24116   // multiples of the size of the data.
24117   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
24118   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
24119   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
24120   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
24121   auto &Size0 = MUC0.NumBytes;
24122   auto &Size1 = MUC1.NumBytes;
24123   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
24124       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
24125       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
24126       SrcValOffset1 % *Size1 == 0) {
24127     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
24128     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
24129 
24130     // There is no overlap between these relatively aligned accesses of
24131     // similar size. Return no alias.
24132     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
24133       return false;
24134   }
24135 
24136   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
24137                    ? CombinerGlobalAA
24138                    : DAG.getSubtarget().useAA();
24139 #ifndef NDEBUG
24140   if (CombinerAAOnlyFunc.getNumOccurrences() &&
24141       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
24142     UseAA = false;
24143 #endif
24144 
24145   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
24146       Size0.hasValue() && Size1.hasValue()) {
24147     // Use alias analysis information.
24148     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
24149     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
24150     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
24151     if (AA->isNoAlias(
24152             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
24153                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
24154             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
24155                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
24156       return false;
24157   }
24158 
24159   // Otherwise we have to assume they alias.
24160   return true;
24161 }
24162 
24163 /// Walk up chain skipping non-aliasing memory nodes,
24164 /// looking for aliasing nodes and adding them to the Aliases vector.
24165 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
24166                                    SmallVectorImpl<SDValue> &Aliases) {
24167   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
24168   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
24169 
24170   // Get alias information for node.
24171   // TODO: relax aliasing for unordered atomics (see D66309)
24172   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
24173 
24174   // Starting off.
24175   Chains.push_back(OriginalChain);
24176   unsigned Depth = 0;
24177 
24178   // Attempt to improve chain by a single step
24179   auto ImproveChain = [&](SDValue &C) -> bool {
24180     switch (C.getOpcode()) {
24181     case ISD::EntryToken:
24182       // No need to mark EntryToken.
24183       C = SDValue();
24184       return true;
24185     case ISD::LOAD:
24186     case ISD::STORE: {
24187       // Get alias information for C.
24188       // TODO: Relax aliasing for unordered atomics (see D66309)
24189       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
24190                       cast<LSBaseSDNode>(C.getNode())->isSimple();
24191       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
24192         // Look further up the chain.
24193         C = C.getOperand(0);
24194         return true;
24195       }
24196       // Alias, so stop here.
24197       return false;
24198     }
24199 
24200     case ISD::CopyFromReg:
24201       // Always forward past past CopyFromReg.
24202       C = C.getOperand(0);
24203       return true;
24204 
24205     case ISD::LIFETIME_START:
24206     case ISD::LIFETIME_END: {
24207       // We can forward past any lifetime start/end that can be proven not to
24208       // alias the memory access.
24209       if (!mayAlias(N, C.getNode())) {
24210         // Look further up the chain.
24211         C = C.getOperand(0);
24212         return true;
24213       }
24214       return false;
24215     }
24216     default:
24217       return false;
24218     }
24219   };
24220 
24221   // Look at each chain and determine if it is an alias.  If so, add it to the
24222   // aliases list.  If not, then continue up the chain looking for the next
24223   // candidate.
24224   while (!Chains.empty()) {
24225     SDValue Chain = Chains.pop_back_val();
24226 
24227     // Don't bother if we've seen Chain before.
24228     if (!Visited.insert(Chain.getNode()).second)
24229       continue;
24230 
24231     // For TokenFactor nodes, look at each operand and only continue up the
24232     // chain until we reach the depth limit.
24233     //
24234     // FIXME: The depth check could be made to return the last non-aliasing
24235     // chain we found before we hit a tokenfactor rather than the original
24236     // chain.
24237     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
24238       Aliases.clear();
24239       Aliases.push_back(OriginalChain);
24240       return;
24241     }
24242 
24243     if (Chain.getOpcode() == ISD::TokenFactor) {
24244       // We have to check each of the operands of the token factor for "small"
24245       // token factors, so we queue them up.  Adding the operands to the queue
24246       // (stack) in reverse order maintains the original order and increases the
24247       // likelihood that getNode will find a matching token factor (CSE.)
24248       if (Chain.getNumOperands() > 16) {
24249         Aliases.push_back(Chain);
24250         continue;
24251       }
24252       for (unsigned n = Chain.getNumOperands(); n;)
24253         Chains.push_back(Chain.getOperand(--n));
24254       ++Depth;
24255       continue;
24256     }
24257     // Everything else
24258     if (ImproveChain(Chain)) {
24259       // Updated Chain Found, Consider new chain if one exists.
24260       if (Chain.getNode())
24261         Chains.push_back(Chain);
24262       ++Depth;
24263       continue;
24264     }
24265     // No Improved Chain Possible, treat as Alias.
24266     Aliases.push_back(Chain);
24267   }
24268 }
24269 
24270 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24271 /// (aliasing node.)
24272 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24273   if (OptLevel == CodeGenOpt::None)
24274     return OldChain;
24275 
24276   // Ops for replacing token factor.
24277   SmallVector<SDValue, 8> Aliases;
24278 
24279   // Accumulate all the aliases to this node.
24280   GatherAllAliases(N, OldChain, Aliases);
24281 
24282   // If no operands then chain to entry token.
24283   if (Aliases.size() == 0)
24284     return DAG.getEntryNode();
24285 
24286   // If a single operand then chain to it.  We don't need to revisit it.
24287   if (Aliases.size() == 1)
24288     return Aliases[0];
24289 
24290   // Construct a custom tailored token factor.
24291   return DAG.getTokenFactor(SDLoc(N), Aliases);
24292 }
24293 
24294 namespace {
24295 // TODO: Replace with with std::monostate when we move to C++17.
24296 struct UnitT { } Unit;
24297 bool operator==(const UnitT &, const UnitT &) { return true; }
24298 bool operator!=(const UnitT &, const UnitT &) { return false; }
24299 } // namespace
24300 
24301 // This function tries to collect a bunch of potentially interesting
24302 // nodes to improve the chains of, all at once. This might seem
24303 // redundant, as this function gets called when visiting every store
24304 // node, so why not let the work be done on each store as it's visited?
24305 //
24306 // I believe this is mainly important because mergeConsecutiveStores
24307 // is unable to deal with merging stores of different sizes, so unless
24308 // we improve the chains of all the potential candidates up-front
24309 // before running mergeConsecutiveStores, it might only see some of
24310 // the nodes that will eventually be candidates, and then not be able
24311 // to go from a partially-merged state to the desired final
24312 // fully-merged state.
24313 
24314 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24315   SmallVector<StoreSDNode *, 8> ChainedStores;
24316   StoreSDNode *STChain = St;
24317   // Intervals records which offsets from BaseIndex have been covered. In
24318   // the common case, every store writes to the immediately previous address
24319   // space and thus merged with the previous interval at insertion time.
24320 
24321   using IMap =
24322       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
24323   IMap::Allocator A;
24324   IMap Intervals(A);
24325 
24326   // This holds the base pointer, index, and the offset in bytes from the base
24327   // pointer.
24328   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24329 
24330   // We must have a base and an offset.
24331   if (!BasePtr.getBase().getNode())
24332     return false;
24333 
24334   // Do not handle stores to undef base pointers.
24335   if (BasePtr.getBase().isUndef())
24336     return false;
24337 
24338   // Do not handle stores to opaque types
24339   if (St->getMemoryVT().isZeroSized())
24340     return false;
24341 
24342   // BaseIndexOffset assumes that offsets are fixed-size, which
24343   // is not valid for scalable vectors where the offsets are
24344   // scaled by `vscale`, so bail out early.
24345   if (St->getMemoryVT().isScalableVector())
24346     return false;
24347 
24348   // Add ST's interval.
24349   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24350 
24351   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24352     if (Chain->getMemoryVT().isScalableVector())
24353       return false;
24354 
24355     // If the chain has more than one use, then we can't reorder the mem ops.
24356     if (!SDValue(Chain, 0)->hasOneUse())
24357       break;
24358     // TODO: Relax for unordered atomics (see D66309)
24359     if (!Chain->isSimple() || Chain->isIndexed())
24360       break;
24361 
24362     // Find the base pointer and offset for this memory node.
24363     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24364     // Check that the base pointer is the same as the original one.
24365     int64_t Offset;
24366     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24367       break;
24368     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24369     // Make sure we don't overlap with other intervals by checking the ones to
24370     // the left or right before inserting.
24371     auto I = Intervals.find(Offset);
24372     // If there's a next interval, we should end before it.
24373     if (I != Intervals.end() && I.start() < (Offset + Length))
24374       break;
24375     // If there's a previous interval, we should start after it.
24376     if (I != Intervals.begin() && (--I).stop() <= Offset)
24377       break;
24378     Intervals.insert(Offset, Offset + Length, Unit);
24379 
24380     ChainedStores.push_back(Chain);
24381     STChain = Chain;
24382   }
24383 
24384   // If we didn't find a chained store, exit.
24385   if (ChainedStores.size() == 0)
24386     return false;
24387 
24388   // Improve all chained stores (St and ChainedStores members) starting from
24389   // where the store chain ended and return single TokenFactor.
24390   SDValue NewChain = STChain->getChain();
24391   SmallVector<SDValue, 8> TFOps;
24392   for (unsigned I = ChainedStores.size(); I;) {
24393     StoreSDNode *S = ChainedStores[--I];
24394     SDValue BetterChain = FindBetterChain(S, NewChain);
24395     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24396         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24397     TFOps.push_back(SDValue(S, 0));
24398     ChainedStores[I] = S;
24399   }
24400 
24401   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24402   SDValue BetterChain = FindBetterChain(St, NewChain);
24403   SDValue NewST;
24404   if (St->isTruncatingStore())
24405     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24406                               St->getBasePtr(), St->getMemoryVT(),
24407                               St->getMemOperand());
24408   else
24409     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24410                          St->getBasePtr(), St->getMemOperand());
24411 
24412   TFOps.push_back(NewST);
24413 
24414   // If we improved every element of TFOps, then we've lost the dependence on
24415   // NewChain to successors of St and we need to add it back to TFOps. Do so at
24416   // the beginning to keep relative order consistent with FindBetterChains.
24417   auto hasImprovedChain = [&](SDValue ST) -> bool {
24418     return ST->getOperand(0) != NewChain;
24419   };
24420   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24421   if (AddNewChain)
24422     TFOps.insert(TFOps.begin(), NewChain);
24423 
24424   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24425   CombineTo(St, TF);
24426 
24427   // Add TF and its operands to the worklist.
24428   AddToWorklist(TF.getNode());
24429   for (const SDValue &Op : TF->ops())
24430     AddToWorklist(Op.getNode());
24431   AddToWorklist(STChain);
24432   return true;
24433 }
24434 
24435 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24436   if (OptLevel == CodeGenOpt::None)
24437     return false;
24438 
24439   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24440 
24441   // We must have a base and an offset.
24442   if (!BasePtr.getBase().getNode())
24443     return false;
24444 
24445   // Do not handle stores to undef base pointers.
24446   if (BasePtr.getBase().isUndef())
24447     return false;
24448 
24449   // Directly improve a chain of disjoint stores starting at St.
24450   if (parallelizeChainedStores(St))
24451     return true;
24452 
24453   // Improve St's Chain..
24454   SDValue BetterChain = FindBetterChain(St, St->getChain());
24455   if (St->getChain() != BetterChain) {
24456     replaceStoreChain(St, BetterChain);
24457     return true;
24458   }
24459   return false;
24460 }
24461 
24462 /// This is the entry point for the file.
24463 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24464                            CodeGenOpt::Level OptLevel) {
24465   /// This is the main entry point to this class.
24466   DAGCombiner(*this, AA, OptLevel).Run(Level);
24467 }
24468