1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/RuntimeLibcalls.h"
41 #include "llvm/CodeGen/SelectionDAG.h"
42 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
43 #include "llvm/CodeGen/SelectionDAGNodes.h"
44 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
45 #include "llvm/CodeGen/TargetLowering.h"
46 #include "llvm/CodeGen/TargetRegisterInfo.h"
47 #include "llvm/CodeGen/TargetSubtargetInfo.h"
48 #include "llvm/CodeGen/ValueTypes.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
57 #include "llvm/Support/CommandLine.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/ErrorHandling.h"
61 #include "llvm/Support/KnownBits.h"
62 #include "llvm/Support/MachineValueType.h"
63 #include "llvm/Support/MathExtras.h"
64 #include "llvm/Support/raw_ostream.h"
65 #include "llvm/Target/TargetMachine.h"
66 #include "llvm/Target/TargetOptions.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94         cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
97 static cl::opt<std::string>
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99                    cl::desc("Only use DAG-combiner alias analysis in this"
100                             " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107                   cl::desc("Bypass the profitability model of load slicing"),
108                   cl::init(false));
109 
110 static cl::opt<bool>
111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112                     cl::desc("DAG combiner may split indexing from loads"));
113 
114 static cl::opt<bool>
115     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
116                        cl::desc("DAG combiner enable merging multiple stores "
117                                 "into a wider store"));
118 
119 static cl::opt<unsigned> TokenFactorInlineLimit(
120     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
121     cl::desc("Limit the number of operands to inline for Token Factors"));
122 
123 static cl::opt<unsigned> StoreMergeDependenceLimit(
124     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
125     cl::desc("Limit the number of times for the same StoreNode and RootNode "
126              "to bail out in store merging dependence check"));
127 
128 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
129     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
130     cl::desc("DAG combiner enable reducing the width of load/op/store "
131              "sequence"));
132 
133 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
134     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
135     cl::desc("DAG combiner enable load/<replace bytes>/store with "
136              "a narrower store"));
137 
138 namespace {
139 
140   class DAGCombiner {
141     SelectionDAG &DAG;
142     const TargetLowering &TLI;
143     const SelectionDAGTargetInfo *STI;
144     CombineLevel Level = BeforeLegalizeTypes;
145     CodeGenOpt::Level OptLevel;
146     bool LegalDAG = false;
147     bool LegalOperations = false;
148     bool LegalTypes = false;
149     bool ForCodeSize;
150     bool DisableGenericCombines;
151 
152     /// Worklist of all of the nodes that need to be simplified.
153     ///
154     /// This must behave as a stack -- new nodes to process are pushed onto the
155     /// back and when processing we pop off of the back.
156     ///
157     /// The worklist will not contain duplicates but may contain null entries
158     /// due to nodes being deleted from the underlying DAG.
159     SmallVector<SDNode *, 64> Worklist;
160 
161     /// Mapping from an SDNode to its position on the worklist.
162     ///
163     /// This is used to find and remove nodes from the worklist (by nulling
164     /// them) when they are deleted from the underlying DAG. It relies on
165     /// stable indices of nodes within the worklist.
166     DenseMap<SDNode *, unsigned> WorklistMap;
167     /// This records all nodes attempted to add to the worklist since we
168     /// considered a new worklist entry. As we keep do not add duplicate nodes
169     /// in the worklist, this is different from the tail of the worklist.
170     SmallSetVector<SDNode *, 32> PruningList;
171 
172     /// Set of nodes which have been combined (at least once).
173     ///
174     /// This is used to allow us to reliably add any operands of a DAG node
175     /// which have not yet been combined to the worklist.
176     SmallPtrSet<SDNode *, 32> CombinedNodes;
177 
178     /// Map from candidate StoreNode to the pair of RootNode and count.
179     /// The count is used to track how many times we have seen the StoreNode
180     /// with the same RootNode bail out in dependence check. If we have seen
181     /// the bail out for the same pair many times over a limit, we won't
182     /// consider the StoreNode with the same RootNode as store merging
183     /// candidate again.
184     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
185 
186     // AA - Used for DAG load/store alias analysis.
187     AliasAnalysis *AA;
188 
189     /// When an instruction is simplified, add all users of the instruction to
190     /// the work lists because they might get more simplified now.
191     void AddUsersToWorklist(SDNode *N) {
192       for (SDNode *Node : N->uses())
193         AddToWorklist(Node);
194     }
195 
196     /// Convenient shorthand to add a node and all of its user to the worklist.
197     void AddToWorklistWithUsers(SDNode *N) {
198       AddUsersToWorklist(N);
199       AddToWorklist(N);
200     }
201 
202     // Prune potentially dangling nodes. This is called after
203     // any visit to a node, but should also be called during a visit after any
204     // failed combine which may have created a DAG node.
205     void clearAddedDanglingWorklistEntries() {
206       // Check any nodes added to the worklist to see if they are prunable.
207       while (!PruningList.empty()) {
208         auto *N = PruningList.pop_back_val();
209         if (N->use_empty())
210           recursivelyDeleteUnusedNodes(N);
211       }
212     }
213 
214     SDNode *getNextWorklistEntry() {
215       // Before we do any work, remove nodes that are not in use.
216       clearAddedDanglingWorklistEntries();
217       SDNode *N = nullptr;
218       // The Worklist holds the SDNodes in order, but it may contain null
219       // entries.
220       while (!N && !Worklist.empty()) {
221         N = Worklist.pop_back_val();
222       }
223 
224       if (N) {
225         bool GoodWorklistEntry = WorklistMap.erase(N);
226         (void)GoodWorklistEntry;
227         assert(GoodWorklistEntry &&
228                "Found a worklist entry without a corresponding map entry!");
229       }
230       return N;
231     }
232 
233     /// Call the node-specific routine that folds each particular type of node.
234     SDValue visit(SDNode *N);
235 
236   public:
237     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
238         : DAG(D), TLI(D.getTargetLoweringInfo()),
239           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
240       ForCodeSize = DAG.shouldOptForSize();
241       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
242 
243       MaximumLegalStoreInBits = 0;
244       // We use the minimum store size here, since that's all we can guarantee
245       // for the scalable vector types.
246       for (MVT VT : MVT::all_valuetypes())
247         if (EVT(VT).isSimple() && VT != MVT::Other &&
248             TLI.isTypeLegal(EVT(VT)) &&
249             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
250           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
251     }
252 
253     void ConsiderForPruning(SDNode *N) {
254       // Mark this for potential pruning.
255       PruningList.insert(N);
256     }
257 
258     /// Add to the worklist making sure its instance is at the back (next to be
259     /// processed.)
260     void AddToWorklist(SDNode *N) {
261       assert(N->getOpcode() != ISD::DELETED_NODE &&
262              "Deleted Node added to Worklist");
263 
264       // Skip handle nodes as they can't usefully be combined and confuse the
265       // zero-use deletion strategy.
266       if (N->getOpcode() == ISD::HANDLENODE)
267         return;
268 
269       ConsiderForPruning(N);
270 
271       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
272         Worklist.push_back(N);
273     }
274 
275     /// Remove all instances of N from the worklist.
276     void removeFromWorklist(SDNode *N) {
277       CombinedNodes.erase(N);
278       PruningList.remove(N);
279       StoreRootCountMap.erase(N);
280 
281       auto It = WorklistMap.find(N);
282       if (It == WorklistMap.end())
283         return; // Not in the worklist.
284 
285       // Null out the entry rather than erasing it to avoid a linear operation.
286       Worklist[It->second] = nullptr;
287       WorklistMap.erase(It);
288     }
289 
290     void deleteAndRecombine(SDNode *N);
291     bool recursivelyDeleteUnusedNodes(SDNode *N);
292 
293     /// Replaces all uses of the results of one DAG node with new values.
294     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
295                       bool AddTo = true);
296 
297     /// Replaces all uses of the results of one DAG node with new values.
298     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
299       return CombineTo(N, &Res, 1, AddTo);
300     }
301 
302     /// Replaces all uses of the results of one DAG node with new values.
303     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
304                       bool AddTo = true) {
305       SDValue To[] = { Res0, Res1 };
306       return CombineTo(N, To, 2, AddTo);
307     }
308 
309     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
310 
311   private:
312     unsigned MaximumLegalStoreInBits;
313 
314     /// Check the specified integer node value to see if it can be simplified or
315     /// if things it uses can be simplified by bit propagation.
316     /// If so, return true.
317     bool SimplifyDemandedBits(SDValue Op) {
318       unsigned BitWidth = Op.getScalarValueSizeInBits();
319       APInt DemandedBits = APInt::getAllOnes(BitWidth);
320       return SimplifyDemandedBits(Op, DemandedBits);
321     }
322 
323     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
324       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
325       KnownBits Known;
326       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
327         return false;
328 
329       // Revisit the node.
330       AddToWorklist(Op.getNode());
331 
332       CommitTargetLoweringOpt(TLO);
333       return true;
334     }
335 
336     /// Check the specified vector node value to see if it can be simplified or
337     /// if things it uses can be simplified as it only uses some of the
338     /// elements. If so, return true.
339     bool SimplifyDemandedVectorElts(SDValue Op) {
340       // TODO: For now just pretend it cannot be simplified.
341       if (Op.getValueType().isScalableVector())
342         return false;
343 
344       unsigned NumElts = Op.getValueType().getVectorNumElements();
345       APInt DemandedElts = APInt::getAllOnes(NumElts);
346       return SimplifyDemandedVectorElts(Op, DemandedElts);
347     }
348 
349     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
350                               const APInt &DemandedElts,
351                               bool AssumeSingleUse = false);
352     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
353                                     bool AssumeSingleUse = false);
354 
355     bool CombineToPreIndexedLoadStore(SDNode *N);
356     bool CombineToPostIndexedLoadStore(SDNode *N);
357     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
358     bool SliceUpLoad(SDNode *N);
359 
360     // Scalars have size 0 to distinguish from singleton vectors.
361     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
362     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
363     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
364 
365     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
366     ///   load.
367     ///
368     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
369     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
370     /// \param EltNo index of the vector element to load.
371     /// \param OriginalLoad load that EVE came from to be replaced.
372     /// \returns EVE on success SDValue() on failure.
373     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
374                                          SDValue EltNo,
375                                          LoadSDNode *OriginalLoad);
376     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
377     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
378     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
379     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
380     SDValue PromoteIntBinOp(SDValue Op);
381     SDValue PromoteIntShiftOp(SDValue Op);
382     SDValue PromoteExtend(SDValue Op);
383     bool PromoteLoad(SDValue Op);
384 
385     /// Call the node-specific routine that knows how to fold each
386     /// particular type of node. If that doesn't do anything, try the
387     /// target-specific DAG combines.
388     SDValue combine(SDNode *N);
389 
390     // Visitation implementation - Implement dag node combining for different
391     // node types.  The semantics are as follows:
392     // Return Value:
393     //   SDValue.getNode() == 0 - No change was made
394     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
395     //   otherwise              - N should be replaced by the returned Operand.
396     //
397     SDValue visitTokenFactor(SDNode *N);
398     SDValue visitMERGE_VALUES(SDNode *N);
399     SDValue visitADD(SDNode *N);
400     SDValue visitADDLike(SDNode *N);
401     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
402     SDValue visitSUB(SDNode *N);
403     SDValue visitADDSAT(SDNode *N);
404     SDValue visitSUBSAT(SDNode *N);
405     SDValue visitADDC(SDNode *N);
406     SDValue visitADDO(SDNode *N);
407     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
408     SDValue visitSUBC(SDNode *N);
409     SDValue visitSUBO(SDNode *N);
410     SDValue visitADDE(SDNode *N);
411     SDValue visitADDCARRY(SDNode *N);
412     SDValue visitSADDO_CARRY(SDNode *N);
413     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
414     SDValue visitSUBE(SDNode *N);
415     SDValue visitSUBCARRY(SDNode *N);
416     SDValue visitSSUBO_CARRY(SDNode *N);
417     SDValue visitMUL(SDNode *N);
418     SDValue visitMULFIX(SDNode *N);
419     SDValue useDivRem(SDNode *N);
420     SDValue visitSDIV(SDNode *N);
421     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
422     SDValue visitUDIV(SDNode *N);
423     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
424     SDValue visitREM(SDNode *N);
425     SDValue visitMULHU(SDNode *N);
426     SDValue visitMULHS(SDNode *N);
427     SDValue visitAVG(SDNode *N);
428     SDValue visitSMUL_LOHI(SDNode *N);
429     SDValue visitUMUL_LOHI(SDNode *N);
430     SDValue visitMULO(SDNode *N);
431     SDValue visitIMINMAX(SDNode *N);
432     SDValue visitAND(SDNode *N);
433     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
434     SDValue visitOR(SDNode *N);
435     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
436     SDValue visitXOR(SDNode *N);
437     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
438     SDValue visitSHL(SDNode *N);
439     SDValue visitSRA(SDNode *N);
440     SDValue visitSRL(SDNode *N);
441     SDValue visitFunnelShift(SDNode *N);
442     SDValue visitSHLSAT(SDNode *N);
443     SDValue visitRotate(SDNode *N);
444     SDValue visitABS(SDNode *N);
445     SDValue visitBSWAP(SDNode *N);
446     SDValue visitBITREVERSE(SDNode *N);
447     SDValue visitCTLZ(SDNode *N);
448     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
449     SDValue visitCTTZ(SDNode *N);
450     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
451     SDValue visitCTPOP(SDNode *N);
452     SDValue visitSELECT(SDNode *N);
453     SDValue visitVSELECT(SDNode *N);
454     SDValue visitSELECT_CC(SDNode *N);
455     SDValue visitSETCC(SDNode *N);
456     SDValue visitSETCCCARRY(SDNode *N);
457     SDValue visitSIGN_EXTEND(SDNode *N);
458     SDValue visitZERO_EXTEND(SDNode *N);
459     SDValue visitANY_EXTEND(SDNode *N);
460     SDValue visitAssertExt(SDNode *N);
461     SDValue visitAssertAlign(SDNode *N);
462     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
463     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
464     SDValue visitTRUNCATE(SDNode *N);
465     SDValue visitBITCAST(SDNode *N);
466     SDValue visitFREEZE(SDNode *N);
467     SDValue visitBUILD_PAIR(SDNode *N);
468     SDValue visitFADD(SDNode *N);
469     SDValue visitSTRICT_FADD(SDNode *N);
470     SDValue visitFSUB(SDNode *N);
471     SDValue visitFMUL(SDNode *N);
472     SDValue visitFMA(SDNode *N);
473     SDValue visitFDIV(SDNode *N);
474     SDValue visitFREM(SDNode *N);
475     SDValue visitFSQRT(SDNode *N);
476     SDValue visitFCOPYSIGN(SDNode *N);
477     SDValue visitFPOW(SDNode *N);
478     SDValue visitSINT_TO_FP(SDNode *N);
479     SDValue visitUINT_TO_FP(SDNode *N);
480     SDValue visitFP_TO_SINT(SDNode *N);
481     SDValue visitFP_TO_UINT(SDNode *N);
482     SDValue visitFP_ROUND(SDNode *N);
483     SDValue visitFP_EXTEND(SDNode *N);
484     SDValue visitFNEG(SDNode *N);
485     SDValue visitFABS(SDNode *N);
486     SDValue visitFCEIL(SDNode *N);
487     SDValue visitFTRUNC(SDNode *N);
488     SDValue visitFFLOOR(SDNode *N);
489     SDValue visitFMinMax(SDNode *N);
490     SDValue visitBRCOND(SDNode *N);
491     SDValue visitBR_CC(SDNode *N);
492     SDValue visitLOAD(SDNode *N);
493 
494     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
495     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
496 
497     SDValue visitSTORE(SDNode *N);
498     SDValue visitLIFETIME_END(SDNode *N);
499     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
500     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
501     SDValue visitBUILD_VECTOR(SDNode *N);
502     SDValue visitCONCAT_VECTORS(SDNode *N);
503     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
504     SDValue visitVECTOR_SHUFFLE(SDNode *N);
505     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
506     SDValue visitINSERT_SUBVECTOR(SDNode *N);
507     SDValue visitMLOAD(SDNode *N);
508     SDValue visitMSTORE(SDNode *N);
509     SDValue visitMGATHER(SDNode *N);
510     SDValue visitMSCATTER(SDNode *N);
511     SDValue visitFP_TO_FP16(SDNode *N);
512     SDValue visitFP16_TO_FP(SDNode *N);
513     SDValue visitFP_TO_BF16(SDNode *N);
514     SDValue visitVECREDUCE(SDNode *N);
515     SDValue visitVPOp(SDNode *N);
516 
517     SDValue visitFADDForFMACombine(SDNode *N);
518     SDValue visitFSUBForFMACombine(SDNode *N);
519     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
520 
521     SDValue XformToShuffleWithZero(SDNode *N);
522     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
523                                                     const SDLoc &DL,
524                                                     SDNode *N,
525                                                     SDValue N0,
526                                                     SDValue N1);
527     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
528                                       SDValue N1);
529     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
530                            SDValue N1, SDNodeFlags Flags);
531 
532     SDValue visitShiftByConstant(SDNode *N);
533 
534     SDValue foldSelectOfConstants(SDNode *N);
535     SDValue foldVSelectOfConstants(SDNode *N);
536     SDValue foldBinOpIntoSelect(SDNode *BO);
537     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
538     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
539     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
540     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
541                              SDValue N2, SDValue N3, ISD::CondCode CC,
542                              bool NotExtCompare = false);
543     SDValue convertSelectOfFPConstantsToLoadOffset(
544         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
545         ISD::CondCode CC);
546     SDValue foldSignChangeInBitcast(SDNode *N);
547     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
548                                    SDValue N2, SDValue N3, ISD::CondCode CC);
549     SDValue foldSelectOfBinops(SDNode *N);
550     SDValue foldSextSetcc(SDNode *N);
551     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
552                               const SDLoc &DL);
553     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
554     SDValue unfoldMaskedMerge(SDNode *N);
555     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
556     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
557                           const SDLoc &DL, bool foldBooleans);
558     SDValue rebuildSetCC(SDValue N);
559 
560     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
561                            SDValue &CC, bool MatchStrict = false) const;
562     bool isOneUseSetCC(SDValue N) const;
563 
564     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
565                                          unsigned HiOp);
566     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
567     SDValue CombineExtLoad(SDNode *N);
568     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
569     SDValue combineRepeatedFPDivisors(SDNode *N);
570     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
571     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
572     SDValue BuildSDIV(SDNode *N);
573     SDValue BuildSDIVPow2(SDNode *N);
574     SDValue BuildUDIV(SDNode *N);
575     SDValue BuildSREMPow2(SDNode *N);
576     SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
577     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
578     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
579     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
580     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
581     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
582     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
583                                 SDNodeFlags Flags, bool Reciprocal);
584     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
585                                 SDNodeFlags Flags, bool Reciprocal);
586     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
587                                bool DemandHighBits = true);
588     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
589     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
590                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
591                               unsigned PosOpcode, unsigned NegOpcode,
592                               const SDLoc &DL);
593     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
594                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
595                               unsigned PosOpcode, unsigned NegOpcode,
596                               const SDLoc &DL);
597     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
598     SDValue MatchLoadCombine(SDNode *N);
599     SDValue mergeTruncStores(StoreSDNode *N);
600     SDValue reduceLoadWidth(SDNode *N);
601     SDValue ReduceLoadOpStoreWidth(SDNode *N);
602     SDValue splitMergedValStore(StoreSDNode *ST);
603     SDValue TransformFPLoadStorePair(SDNode *N);
604     SDValue convertBuildVecZextToZext(SDNode *N);
605     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
606     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
607     SDValue reduceBuildVecToShuffle(SDNode *N);
608     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
609                                   ArrayRef<int> VectorMask, SDValue VecIn1,
610                                   SDValue VecIn2, unsigned LeftIdx,
611                                   bool DidSplitVec);
612     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
613 
614     /// Walk up chain skipping non-aliasing memory nodes,
615     /// looking for aliasing nodes and adding them to the Aliases vector.
616     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
617                           SmallVectorImpl<SDValue> &Aliases);
618 
619     /// Return true if there is any possibility that the two addresses overlap.
620     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
621 
622     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
623     /// chain (aliasing node.)
624     SDValue FindBetterChain(SDNode *N, SDValue Chain);
625 
626     /// Try to replace a store and any possibly adjacent stores on
627     /// consecutive chains with better chains. Return true only if St is
628     /// replaced.
629     ///
630     /// Notice that other chains may still be replaced even if the function
631     /// returns false.
632     bool findBetterNeighborChains(StoreSDNode *St);
633 
634     // Helper for findBetterNeighborChains. Walk up store chain add additional
635     // chained stores that do not overlap and can be parallelized.
636     bool parallelizeChainedStores(StoreSDNode *St);
637 
638     /// Holds a pointer to an LSBaseSDNode as well as information on where it
639     /// is located in a sequence of memory operations connected by a chain.
640     struct MemOpLink {
641       // Ptr to the mem node.
642       LSBaseSDNode *MemNode;
643 
644       // Offset from the base ptr.
645       int64_t OffsetFromBase;
646 
647       MemOpLink(LSBaseSDNode *N, int64_t Offset)
648           : MemNode(N), OffsetFromBase(Offset) {}
649     };
650 
651     // Classify the origin of a stored value.
652     enum class StoreSource { Unknown, Constant, Extract, Load };
653     StoreSource getStoreSource(SDValue StoreVal) {
654       switch (StoreVal.getOpcode()) {
655       case ISD::Constant:
656       case ISD::ConstantFP:
657         return StoreSource::Constant;
658       case ISD::EXTRACT_VECTOR_ELT:
659       case ISD::EXTRACT_SUBVECTOR:
660         return StoreSource::Extract;
661       case ISD::LOAD:
662         return StoreSource::Load;
663       default:
664         return StoreSource::Unknown;
665       }
666     }
667 
668     /// This is a helper function for visitMUL to check the profitability
669     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
670     /// MulNode is the original multiply, AddNode is (add x, c1),
671     /// and ConstNode is c2.
672     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
673                                      SDValue ConstNode);
674 
675     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
676     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
677     /// the type of the loaded value to be extended.
678     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
679                           EVT LoadResultTy, EVT &ExtVT);
680 
681     /// Helper function to calculate whether the given Load/Store can have its
682     /// width reduced to ExtVT.
683     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
684                            EVT &MemVT, unsigned ShAmt = 0);
685 
686     /// Used by BackwardsPropagateMask to find suitable loads.
687     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
688                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
689                            ConstantSDNode *Mask, SDNode *&NodeToMask);
690     /// Attempt to propagate a given AND node back to load leaves so that they
691     /// can be combined into narrow loads.
692     bool BackwardsPropagateMask(SDNode *N);
693 
694     /// Helper function for mergeConsecutiveStores which merges the component
695     /// store chains.
696     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
697                                 unsigned NumStores);
698 
699     /// This is a helper function for mergeConsecutiveStores. When the source
700     /// elements of the consecutive stores are all constants or all extracted
701     /// vector elements, try to merge them into one larger store introducing
702     /// bitcasts if necessary.  \return True if a merged store was created.
703     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
704                                          EVT MemVT, unsigned NumStores,
705                                          bool IsConstantSrc, bool UseVector,
706                                          bool UseTrunc);
707 
708     /// This is a helper function for mergeConsecutiveStores. Stores that
709     /// potentially may be merged with St are placed in StoreNodes. RootNode is
710     /// a chain predecessor to all store candidates.
711     void getStoreMergeCandidates(StoreSDNode *St,
712                                  SmallVectorImpl<MemOpLink> &StoreNodes,
713                                  SDNode *&Root);
714 
715     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
716     /// have indirect dependency through their operands. RootNode is the
717     /// predecessor to all stores calculated by getStoreMergeCandidates and is
718     /// used to prune the dependency check. \return True if safe to merge.
719     bool checkMergeStoreCandidatesForDependencies(
720         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
721         SDNode *RootNode);
722 
723     /// This is a helper function for mergeConsecutiveStores. Given a list of
724     /// store candidates, find the first N that are consecutive in memory.
725     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
726     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
727                                   int64_t ElementSizeBytes) const;
728 
729     /// This is a helper function for mergeConsecutiveStores. It is used for
730     /// store chains that are composed entirely of constant values.
731     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
732                                   unsigned NumConsecutiveStores,
733                                   EVT MemVT, SDNode *Root, bool AllowVectors);
734 
735     /// This is a helper function for mergeConsecutiveStores. It is used for
736     /// store chains that are composed entirely of extracted vector elements.
737     /// When extracting multiple vector elements, try to store them in one
738     /// vector store rather than a sequence of scalar stores.
739     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
740                                  unsigned NumConsecutiveStores, EVT MemVT,
741                                  SDNode *Root);
742 
743     /// This is a helper function for mergeConsecutiveStores. It is used for
744     /// store chains that are composed entirely of loaded values.
745     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
746                               unsigned NumConsecutiveStores, EVT MemVT,
747                               SDNode *Root, bool AllowVectors,
748                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
749 
750     /// Merge consecutive store operations into a wide store.
751     /// This optimization uses wide integers or vectors when possible.
752     /// \return true if stores were merged.
753     bool mergeConsecutiveStores(StoreSDNode *St);
754 
755     /// Try to transform a truncation where C is a constant:
756     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
757     ///
758     /// \p N needs to be a truncation and its first operand an AND. Other
759     /// requirements are checked by the function (e.g. that trunc is
760     /// single-use) and if missed an empty SDValue is returned.
761     SDValue distributeTruncateThroughAnd(SDNode *N);
762 
763     /// Helper function to determine whether the target supports operation
764     /// given by \p Opcode for type \p VT, that is, whether the operation
765     /// is legal or custom before legalizing operations, and whether is
766     /// legal (but not custom) after legalization.
767     bool hasOperation(unsigned Opcode, EVT VT) {
768       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
769     }
770 
771   public:
772     /// Runs the dag combiner on all nodes in the work list
773     void Run(CombineLevel AtLevel);
774 
775     SelectionDAG &getDAG() const { return DAG; }
776 
777     /// Returns a type large enough to hold any valid shift amount - before type
778     /// legalization these can be huge.
779     EVT getShiftAmountTy(EVT LHSTy) {
780       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
781       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
782     }
783 
784     /// This method returns true if we are running before type legalization or
785     /// if the specified VT is legal.
786     bool isTypeLegal(const EVT &VT) {
787       if (!LegalTypes) return true;
788       return TLI.isTypeLegal(VT);
789     }
790 
791     /// Convenience wrapper around TargetLowering::getSetCCResultType
792     EVT getSetCCResultType(EVT VT) const {
793       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
794     }
795 
796     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
797                          SDValue OrigLoad, SDValue ExtLoad,
798                          ISD::NodeType ExtType);
799   };
800 
801 /// This class is a DAGUpdateListener that removes any deleted
802 /// nodes from the worklist.
803 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
804   DAGCombiner &DC;
805 
806 public:
807   explicit WorklistRemover(DAGCombiner &dc)
808     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
809 
810   void NodeDeleted(SDNode *N, SDNode *E) override {
811     DC.removeFromWorklist(N);
812   }
813 };
814 
815 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
816   DAGCombiner &DC;
817 
818 public:
819   explicit WorklistInserter(DAGCombiner &dc)
820       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
821 
822   // FIXME: Ideally we could add N to the worklist, but this causes exponential
823   //        compile time costs in large DAGs, e.g. Halide.
824   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
825 };
826 
827 } // end anonymous namespace
828 
829 //===----------------------------------------------------------------------===//
830 //  TargetLowering::DAGCombinerInfo implementation
831 //===----------------------------------------------------------------------===//
832 
833 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
834   ((DAGCombiner*)DC)->AddToWorklist(N);
835 }
836 
837 SDValue TargetLowering::DAGCombinerInfo::
838 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
839   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
840 }
841 
842 SDValue TargetLowering::DAGCombinerInfo::
843 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
844   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
845 }
846 
847 SDValue TargetLowering::DAGCombinerInfo::
848 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
849   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
850 }
851 
852 bool TargetLowering::DAGCombinerInfo::
853 recursivelyDeleteUnusedNodes(SDNode *N) {
854   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
855 }
856 
857 void TargetLowering::DAGCombinerInfo::
858 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
859   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // Helper Functions
864 //===----------------------------------------------------------------------===//
865 
866 void DAGCombiner::deleteAndRecombine(SDNode *N) {
867   removeFromWorklist(N);
868 
869   // If the operands of this node are only used by the node, they will now be
870   // dead. Make sure to re-visit them and recursively delete dead nodes.
871   for (const SDValue &Op : N->ops())
872     // For an operand generating multiple values, one of the values may
873     // become dead allowing further simplification (e.g. split index
874     // arithmetic from an indexed load).
875     if (Op->hasOneUse() || Op->getNumValues() > 1)
876       AddToWorklist(Op.getNode());
877 
878   DAG.DeleteNode(N);
879 }
880 
881 // APInts must be the same size for most operations, this helper
882 // function zero extends the shorter of the pair so that they match.
883 // We provide an Offset so that we can create bitwidths that won't overflow.
884 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
885   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
886   LHS = LHS.zext(Bits);
887   RHS = RHS.zext(Bits);
888 }
889 
890 // Return true if this node is a setcc, or is a select_cc
891 // that selects between the target values used for true and false, making it
892 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
893 // the appropriate nodes based on the type of node we are checking. This
894 // simplifies life a bit for the callers.
895 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
896                                     SDValue &CC, bool MatchStrict) const {
897   if (N.getOpcode() == ISD::SETCC) {
898     LHS = N.getOperand(0);
899     RHS = N.getOperand(1);
900     CC  = N.getOperand(2);
901     return true;
902   }
903 
904   if (MatchStrict &&
905       (N.getOpcode() == ISD::STRICT_FSETCC ||
906        N.getOpcode() == ISD::STRICT_FSETCCS)) {
907     LHS = N.getOperand(1);
908     RHS = N.getOperand(2);
909     CC  = N.getOperand(3);
910     return true;
911   }
912 
913   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
914       !TLI.isConstFalseVal(N.getOperand(3)))
915     return false;
916 
917   if (TLI.getBooleanContents(N.getValueType()) ==
918       TargetLowering::UndefinedBooleanContent)
919     return false;
920 
921   LHS = N.getOperand(0);
922   RHS = N.getOperand(1);
923   CC  = N.getOperand(4);
924   return true;
925 }
926 
927 /// Return true if this is a SetCC-equivalent operation with only one use.
928 /// If this is true, it allows the users to invert the operation for free when
929 /// it is profitable to do so.
930 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
931   SDValue N0, N1, N2;
932   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
933     return true;
934   return false;
935 }
936 
937 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
938   if (!ScalarTy.isSimple())
939     return false;
940 
941   uint64_t MaskForTy = 0ULL;
942   switch (ScalarTy.getSimpleVT().SimpleTy) {
943   case MVT::i8:
944     MaskForTy = 0xFFULL;
945     break;
946   case MVT::i16:
947     MaskForTy = 0xFFFFULL;
948     break;
949   case MVT::i32:
950     MaskForTy = 0xFFFFFFFFULL;
951     break;
952   default:
953     return false;
954     break;
955   }
956 
957   APInt Val;
958   if (ISD::isConstantSplatVector(N, Val))
959     return Val.getLimitedValue() == MaskForTy;
960 
961   return false;
962 }
963 
964 // Determines if it is a constant integer or a splat/build vector of constant
965 // integers (and undefs).
966 // Do not permit build vector implicit truncation.
967 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
968   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
969     return !(Const->isOpaque() && NoOpaques);
970   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
971     return false;
972   unsigned BitWidth = N.getScalarValueSizeInBits();
973   for (const SDValue &Op : N->op_values()) {
974     if (Op.isUndef())
975       continue;
976     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
977     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
978         (Const->isOpaque() && NoOpaques))
979       return false;
980   }
981   return true;
982 }
983 
984 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
985 // undef's.
986 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
987   if (V.getOpcode() != ISD::BUILD_VECTOR)
988     return false;
989   return isConstantOrConstantVector(V, NoOpaques) ||
990          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
991 }
992 
993 // Determine if this an indexed load with an opaque target constant index.
994 static bool canSplitIdx(LoadSDNode *LD) {
995   return MaySplitLoadIndex &&
996          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
997           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
998 }
999 
1000 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1001                                                              const SDLoc &DL,
1002                                                              SDNode *N,
1003                                                              SDValue N0,
1004                                                              SDValue N1) {
1005   // Currently this only tries to ensure we don't undo the GEP splits done by
1006   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1007   // we check if the following transformation would be problematic:
1008   // (load/store (add, (add, x, offset1), offset2)) ->
1009   // (load/store (add, x, offset1+offset2)).
1010 
1011   // (load/store (add, (add, x, y), offset2)) ->
1012   // (load/store (add, (add, x, offset2), y)).
1013 
1014   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1015     return false;
1016 
1017   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1018   if (!C2)
1019     return false;
1020 
1021   const APInt &C2APIntVal = C2->getAPIntValue();
1022   if (C2APIntVal.getSignificantBits() > 64)
1023     return false;
1024 
1025   if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1026     if (N0.hasOneUse())
1027       return false;
1028 
1029     const APInt &C1APIntVal = C1->getAPIntValue();
1030     const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1031     if (CombinedValueIntVal.getSignificantBits() > 64)
1032       return false;
1033     const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1034 
1035     for (SDNode *Node : N->uses()) {
1036       if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1037         // Is x[offset2] already not a legal addressing mode? If so then
1038         // reassociating the constants breaks nothing (we test offset2 because
1039         // that's the one we hope to fold into the load or store).
1040         TargetLoweringBase::AddrMode AM;
1041         AM.HasBaseReg = true;
1042         AM.BaseOffs = C2APIntVal.getSExtValue();
1043         EVT VT = LoadStore->getMemoryVT();
1044         unsigned AS = LoadStore->getAddressSpace();
1045         Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1046         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1047           continue;
1048 
1049         // Would x[offset1+offset2] still be a legal addressing mode?
1050         AM.BaseOffs = CombinedValue;
1051         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1052           return true;
1053       }
1054     }
1055   } else {
1056     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1057       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1058         return false;
1059 
1060     for (SDNode *Node : N->uses()) {
1061       auto *LoadStore = dyn_cast<MemSDNode>(Node);
1062       if (!LoadStore)
1063         return false;
1064 
1065       // Is x[offset2] a legal addressing mode? If so then
1066       // reassociating the constants breaks address pattern
1067       TargetLoweringBase::AddrMode AM;
1068       AM.HasBaseReg = true;
1069       AM.BaseOffs = C2APIntVal.getSExtValue();
1070       EVT VT = LoadStore->getMemoryVT();
1071       unsigned AS = LoadStore->getAddressSpace();
1072       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1073       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1074         return false;
1075     }
1076     return true;
1077   }
1078 
1079   return false;
1080 }
1081 
1082 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1083 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1084 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1085                                                SDValue N0, SDValue N1) {
1086   EVT VT = N0.getValueType();
1087 
1088   if (N0.getOpcode() != Opc)
1089     return SDValue();
1090 
1091   SDValue N00 = N0.getOperand(0);
1092   SDValue N01 = N0.getOperand(1);
1093 
1094   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1095     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1096       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1097       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1098         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1099       return SDValue();
1100     }
1101     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1102       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1103       //              iff (op x, c1) has one use
1104       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
1105       return DAG.getNode(Opc, DL, VT, OpNode, N01);
1106     }
1107   }
1108 
1109   // Check for repeated operand logic simplifications.
1110   if (Opc == ISD::AND || Opc == ISD::OR) {
1111     // (N00 & N01) & N00 --> N00 & N01
1112     // (N00 & N01) & N01 --> N00 & N01
1113     // (N00 | N01) | N00 --> N00 | N01
1114     // (N00 | N01) | N01 --> N00 | N01
1115     if (N1 == N00 || N1 == N01)
1116       return N0;
1117   }
1118   if (Opc == ISD::XOR) {
1119     // (N00 ^ N01) ^ N00 --> N01
1120     if (N1 == N00)
1121       return N01;
1122     // (N00 ^ N01) ^ N01 --> N00
1123     if (N1 == N01)
1124       return N00;
1125   }
1126 
1127   if (TLI.isReassocProfitable(DAG, N0, N1)) {
1128     if (N1 != N01) {
1129       // Reassociate if (op N00, N1) already exist
1130       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1131         // if Op (Op N00, N1), N01 already exist
1132         // we need to stop reassciate to avoid dead loop
1133         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1134           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1135       }
1136     }
1137 
1138     if (N1 != N00) {
1139       // Reassociate if (op N01, N1) already exist
1140       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1141         // if Op (Op N01, N1), N00 already exist
1142         // we need to stop reassciate to avoid dead loop
1143         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1144           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1145       }
1146     }
1147   }
1148 
1149   return SDValue();
1150 }
1151 
1152 // Try to reassociate commutative binops.
1153 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1154                                     SDValue N1, SDNodeFlags Flags) {
1155   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1156 
1157   // Floating-point reassociation is not allowed without loose FP math.
1158   if (N0.getValueType().isFloatingPoint() ||
1159       N1.getValueType().isFloatingPoint())
1160     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1161       return SDValue();
1162 
1163   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1164     return Combined;
1165   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1166     return Combined;
1167   return SDValue();
1168 }
1169 
1170 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1171                                bool AddTo) {
1172   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1173   ++NodesCombined;
1174   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1175              To[0].dump(&DAG);
1176              dbgs() << " and " << NumTo - 1 << " other values\n");
1177   for (unsigned i = 0, e = NumTo; i != e; ++i)
1178     assert((!To[i].getNode() ||
1179             N->getValueType(i) == To[i].getValueType()) &&
1180            "Cannot combine value to value of different type!");
1181 
1182   WorklistRemover DeadNodes(*this);
1183   DAG.ReplaceAllUsesWith(N, To);
1184   if (AddTo) {
1185     // Push the new nodes and any users onto the worklist
1186     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1187       if (To[i].getNode())
1188         AddToWorklistWithUsers(To[i].getNode());
1189     }
1190   }
1191 
1192   // Finally, if the node is now dead, remove it from the graph.  The node
1193   // may not be dead if the replacement process recursively simplified to
1194   // something else needing this node.
1195   if (N->use_empty())
1196     deleteAndRecombine(N);
1197   return SDValue(N, 0);
1198 }
1199 
1200 void DAGCombiner::
1201 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1202   // Replace the old value with the new one.
1203   ++NodesCombined;
1204   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1205              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1206 
1207   // Replace all uses.  If any nodes become isomorphic to other nodes and
1208   // are deleted, make sure to remove them from our worklist.
1209   WorklistRemover DeadNodes(*this);
1210   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1211 
1212   // Push the new node and any (possibly new) users onto the worklist.
1213   AddToWorklistWithUsers(TLO.New.getNode());
1214 
1215   // Finally, if the node is now dead, remove it from the graph.  The node
1216   // may not be dead if the replacement process recursively simplified to
1217   // something else needing this node.
1218   if (TLO.Old->use_empty())
1219     deleteAndRecombine(TLO.Old.getNode());
1220 }
1221 
1222 /// Check the specified integer node value to see if it can be simplified or if
1223 /// things it uses can be simplified by bit propagation. If so, return true.
1224 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1225                                        const APInt &DemandedElts,
1226                                        bool AssumeSingleUse) {
1227   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1228   KnownBits Known;
1229   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1230                                 AssumeSingleUse))
1231     return false;
1232 
1233   // Revisit the node.
1234   AddToWorklist(Op.getNode());
1235 
1236   CommitTargetLoweringOpt(TLO);
1237   return true;
1238 }
1239 
1240 /// Check the specified vector node value to see if it can be simplified or
1241 /// if things it uses can be simplified as it only uses some of the elements.
1242 /// If so, return true.
1243 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1244                                              const APInt &DemandedElts,
1245                                              bool AssumeSingleUse) {
1246   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1247   APInt KnownUndef, KnownZero;
1248   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1249                                       TLO, 0, AssumeSingleUse))
1250     return false;
1251 
1252   // Revisit the node.
1253   AddToWorklist(Op.getNode());
1254 
1255   CommitTargetLoweringOpt(TLO);
1256   return true;
1257 }
1258 
1259 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1260   SDLoc DL(Load);
1261   EVT VT = Load->getValueType(0);
1262   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1263 
1264   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1265              Trunc.dump(&DAG); dbgs() << '\n');
1266   WorklistRemover DeadNodes(*this);
1267   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1268   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1269   deleteAndRecombine(Load);
1270   AddToWorklist(Trunc.getNode());
1271 }
1272 
1273 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1274   Replace = false;
1275   SDLoc DL(Op);
1276   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1277     LoadSDNode *LD = cast<LoadSDNode>(Op);
1278     EVT MemVT = LD->getMemoryVT();
1279     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1280                                                       : LD->getExtensionType();
1281     Replace = true;
1282     return DAG.getExtLoad(ExtType, DL, PVT,
1283                           LD->getChain(), LD->getBasePtr(),
1284                           MemVT, LD->getMemOperand());
1285   }
1286 
1287   unsigned Opc = Op.getOpcode();
1288   switch (Opc) {
1289   default: break;
1290   case ISD::AssertSext:
1291     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1292       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1293     break;
1294   case ISD::AssertZext:
1295     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1296       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1297     break;
1298   case ISD::Constant: {
1299     unsigned ExtOpc =
1300       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1301     return DAG.getNode(ExtOpc, DL, PVT, Op);
1302   }
1303   }
1304 
1305   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1306     return SDValue();
1307   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1308 }
1309 
1310 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1311   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1312     return SDValue();
1313   EVT OldVT = Op.getValueType();
1314   SDLoc DL(Op);
1315   bool Replace = false;
1316   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1317   if (!NewOp.getNode())
1318     return SDValue();
1319   AddToWorklist(NewOp.getNode());
1320 
1321   if (Replace)
1322     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1323   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1324                      DAG.getValueType(OldVT));
1325 }
1326 
1327 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1328   EVT OldVT = Op.getValueType();
1329   SDLoc DL(Op);
1330   bool Replace = false;
1331   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1332   if (!NewOp.getNode())
1333     return SDValue();
1334   AddToWorklist(NewOp.getNode());
1335 
1336   if (Replace)
1337     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1338   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1339 }
1340 
1341 /// Promote the specified integer binary operation if the target indicates it is
1342 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1343 /// i32 since i16 instructions are longer.
1344 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1345   if (!LegalOperations)
1346     return SDValue();
1347 
1348   EVT VT = Op.getValueType();
1349   if (VT.isVector() || !VT.isInteger())
1350     return SDValue();
1351 
1352   // If operation type is 'undesirable', e.g. i16 on x86, consider
1353   // promoting it.
1354   unsigned Opc = Op.getOpcode();
1355   if (TLI.isTypeDesirableForOp(Opc, VT))
1356     return SDValue();
1357 
1358   EVT PVT = VT;
1359   // Consult target whether it is a good idea to promote this operation and
1360   // what's the right type to promote it to.
1361   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1362     assert(PVT != VT && "Don't know what type to promote to!");
1363 
1364     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1365 
1366     bool Replace0 = false;
1367     SDValue N0 = Op.getOperand(0);
1368     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1369 
1370     bool Replace1 = false;
1371     SDValue N1 = Op.getOperand(1);
1372     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1373     SDLoc DL(Op);
1374 
1375     SDValue RV =
1376         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1377 
1378     // We are always replacing N0/N1's use in N and only need additional
1379     // replacements if there are additional uses.
1380     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1381     //       (SDValue) here because the node may reference multiple values
1382     //       (for example, the chain value of a load node).
1383     Replace0 &= !N0->hasOneUse();
1384     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1385 
1386     // Combine Op here so it is preserved past replacements.
1387     CombineTo(Op.getNode(), RV);
1388 
1389     // If operands have a use ordering, make sure we deal with
1390     // predecessor first.
1391     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1392       std::swap(N0, N1);
1393       std::swap(NN0, NN1);
1394     }
1395 
1396     if (Replace0) {
1397       AddToWorklist(NN0.getNode());
1398       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1399     }
1400     if (Replace1) {
1401       AddToWorklist(NN1.getNode());
1402       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1403     }
1404     return Op;
1405   }
1406   return SDValue();
1407 }
1408 
1409 /// Promote the specified integer shift operation if the target indicates it is
1410 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1411 /// i32 since i16 instructions are longer.
1412 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1413   if (!LegalOperations)
1414     return SDValue();
1415 
1416   EVT VT = Op.getValueType();
1417   if (VT.isVector() || !VT.isInteger())
1418     return SDValue();
1419 
1420   // If operation type is 'undesirable', e.g. i16 on x86, consider
1421   // promoting it.
1422   unsigned Opc = Op.getOpcode();
1423   if (TLI.isTypeDesirableForOp(Opc, VT))
1424     return SDValue();
1425 
1426   EVT PVT = VT;
1427   // Consult target whether it is a good idea to promote this operation and
1428   // what's the right type to promote it to.
1429   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1430     assert(PVT != VT && "Don't know what type to promote to!");
1431 
1432     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1433 
1434     bool Replace = false;
1435     SDValue N0 = Op.getOperand(0);
1436     if (Opc == ISD::SRA)
1437       N0 = SExtPromoteOperand(N0, PVT);
1438     else if (Opc == ISD::SRL)
1439       N0 = ZExtPromoteOperand(N0, PVT);
1440     else
1441       N0 = PromoteOperand(N0, PVT, Replace);
1442 
1443     if (!N0.getNode())
1444       return SDValue();
1445 
1446     SDLoc DL(Op);
1447     SDValue N1 = Op.getOperand(1);
1448     SDValue RV =
1449         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1450 
1451     if (Replace)
1452       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1453 
1454     // Deal with Op being deleted.
1455     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1456       return RV;
1457   }
1458   return SDValue();
1459 }
1460 
1461 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1462   if (!LegalOperations)
1463     return SDValue();
1464 
1465   EVT VT = Op.getValueType();
1466   if (VT.isVector() || !VT.isInteger())
1467     return SDValue();
1468 
1469   // If operation type is 'undesirable', e.g. i16 on x86, consider
1470   // promoting it.
1471   unsigned Opc = Op.getOpcode();
1472   if (TLI.isTypeDesirableForOp(Opc, VT))
1473     return SDValue();
1474 
1475   EVT PVT = VT;
1476   // Consult target whether it is a good idea to promote this operation and
1477   // what's the right type to promote it to.
1478   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1479     assert(PVT != VT && "Don't know what type to promote to!");
1480     // fold (aext (aext x)) -> (aext x)
1481     // fold (aext (zext x)) -> (zext x)
1482     // fold (aext (sext x)) -> (sext x)
1483     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1484     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1485   }
1486   return SDValue();
1487 }
1488 
1489 bool DAGCombiner::PromoteLoad(SDValue Op) {
1490   if (!LegalOperations)
1491     return false;
1492 
1493   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1494     return false;
1495 
1496   EVT VT = Op.getValueType();
1497   if (VT.isVector() || !VT.isInteger())
1498     return false;
1499 
1500   // If operation type is 'undesirable', e.g. i16 on x86, consider
1501   // promoting it.
1502   unsigned Opc = Op.getOpcode();
1503   if (TLI.isTypeDesirableForOp(Opc, VT))
1504     return false;
1505 
1506   EVT PVT = VT;
1507   // Consult target whether it is a good idea to promote this operation and
1508   // what's the right type to promote it to.
1509   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1510     assert(PVT != VT && "Don't know what type to promote to!");
1511 
1512     SDLoc DL(Op);
1513     SDNode *N = Op.getNode();
1514     LoadSDNode *LD = cast<LoadSDNode>(N);
1515     EVT MemVT = LD->getMemoryVT();
1516     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1517                                                       : LD->getExtensionType();
1518     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1519                                    LD->getChain(), LD->getBasePtr(),
1520                                    MemVT, LD->getMemOperand());
1521     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1522 
1523     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1524                Result.dump(&DAG); dbgs() << '\n');
1525     WorklistRemover DeadNodes(*this);
1526     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1527     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1528     deleteAndRecombine(N);
1529     AddToWorklist(Result.getNode());
1530     return true;
1531   }
1532   return false;
1533 }
1534 
1535 /// Recursively delete a node which has no uses and any operands for
1536 /// which it is the only use.
1537 ///
1538 /// Note that this both deletes the nodes and removes them from the worklist.
1539 /// It also adds any nodes who have had a user deleted to the worklist as they
1540 /// may now have only one use and subject to other combines.
1541 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1542   if (!N->use_empty())
1543     return false;
1544 
1545   SmallSetVector<SDNode *, 16> Nodes;
1546   Nodes.insert(N);
1547   do {
1548     N = Nodes.pop_back_val();
1549     if (!N)
1550       continue;
1551 
1552     if (N->use_empty()) {
1553       for (const SDValue &ChildN : N->op_values())
1554         Nodes.insert(ChildN.getNode());
1555 
1556       removeFromWorklist(N);
1557       DAG.DeleteNode(N);
1558     } else {
1559       AddToWorklist(N);
1560     }
1561   } while (!Nodes.empty());
1562   return true;
1563 }
1564 
1565 //===----------------------------------------------------------------------===//
1566 //  Main DAG Combiner implementation
1567 //===----------------------------------------------------------------------===//
1568 
1569 void DAGCombiner::Run(CombineLevel AtLevel) {
1570   // set the instance variables, so that the various visit routines may use it.
1571   Level = AtLevel;
1572   LegalDAG = Level >= AfterLegalizeDAG;
1573   LegalOperations = Level >= AfterLegalizeVectorOps;
1574   LegalTypes = Level >= AfterLegalizeTypes;
1575 
1576   WorklistInserter AddNodes(*this);
1577 
1578   // Add all the dag nodes to the worklist.
1579   for (SDNode &Node : DAG.allnodes())
1580     AddToWorklist(&Node);
1581 
1582   // Create a dummy node (which is not added to allnodes), that adds a reference
1583   // to the root node, preventing it from being deleted, and tracking any
1584   // changes of the root.
1585   HandleSDNode Dummy(DAG.getRoot());
1586 
1587   // While we have a valid worklist entry node, try to combine it.
1588   while (SDNode *N = getNextWorklistEntry()) {
1589     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1590     // N is deleted from the DAG, since they too may now be dead or may have a
1591     // reduced number of uses, allowing other xforms.
1592     if (recursivelyDeleteUnusedNodes(N))
1593       continue;
1594 
1595     WorklistRemover DeadNodes(*this);
1596 
1597     // If this combine is running after legalizing the DAG, re-legalize any
1598     // nodes pulled off the worklist.
1599     if (LegalDAG) {
1600       SmallSetVector<SDNode *, 16> UpdatedNodes;
1601       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1602 
1603       for (SDNode *LN : UpdatedNodes)
1604         AddToWorklistWithUsers(LN);
1605 
1606       if (!NIsValid)
1607         continue;
1608     }
1609 
1610     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1611 
1612     // Add any operands of the new node which have not yet been combined to the
1613     // worklist as well. Because the worklist uniques things already, this
1614     // won't repeatedly process the same operand.
1615     CombinedNodes.insert(N);
1616     for (const SDValue &ChildN : N->op_values())
1617       if (!CombinedNodes.count(ChildN.getNode()))
1618         AddToWorklist(ChildN.getNode());
1619 
1620     SDValue RV = combine(N);
1621 
1622     if (!RV.getNode())
1623       continue;
1624 
1625     ++NodesCombined;
1626 
1627     // If we get back the same node we passed in, rather than a new node or
1628     // zero, we know that the node must have defined multiple values and
1629     // CombineTo was used.  Since CombineTo takes care of the worklist
1630     // mechanics for us, we have no work to do in this case.
1631     if (RV.getNode() == N)
1632       continue;
1633 
1634     assert(N->getOpcode() != ISD::DELETED_NODE &&
1635            RV.getOpcode() != ISD::DELETED_NODE &&
1636            "Node was deleted but visit returned new node!");
1637 
1638     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1639 
1640     if (N->getNumValues() == RV->getNumValues())
1641       DAG.ReplaceAllUsesWith(N, RV.getNode());
1642     else {
1643       assert(N->getValueType(0) == RV.getValueType() &&
1644              N->getNumValues() == 1 && "Type mismatch");
1645       DAG.ReplaceAllUsesWith(N, &RV);
1646     }
1647 
1648     // Push the new node and any users onto the worklist.  Omit this if the
1649     // new node is the EntryToken (e.g. if a store managed to get optimized
1650     // out), because re-visiting the EntryToken and its users will not uncover
1651     // any additional opportunities, but there may be a large number of such
1652     // users, potentially causing compile time explosion.
1653     if (RV.getOpcode() != ISD::EntryToken) {
1654       AddToWorklist(RV.getNode());
1655       AddUsersToWorklist(RV.getNode());
1656     }
1657 
1658     // Finally, if the node is now dead, remove it from the graph.  The node
1659     // may not be dead if the replacement process recursively simplified to
1660     // something else needing this node. This will also take care of adding any
1661     // operands which have lost a user to the worklist.
1662     recursivelyDeleteUnusedNodes(N);
1663   }
1664 
1665   // If the root changed (e.g. it was a dead load, update the root).
1666   DAG.setRoot(Dummy.getValue());
1667   DAG.RemoveDeadNodes();
1668 }
1669 
1670 SDValue DAGCombiner::visit(SDNode *N) {
1671   switch (N->getOpcode()) {
1672   default: break;
1673   case ISD::TokenFactor:        return visitTokenFactor(N);
1674   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1675   case ISD::ADD:                return visitADD(N);
1676   case ISD::SUB:                return visitSUB(N);
1677   case ISD::SADDSAT:
1678   case ISD::UADDSAT:            return visitADDSAT(N);
1679   case ISD::SSUBSAT:
1680   case ISD::USUBSAT:            return visitSUBSAT(N);
1681   case ISD::ADDC:               return visitADDC(N);
1682   case ISD::SADDO:
1683   case ISD::UADDO:              return visitADDO(N);
1684   case ISD::SUBC:               return visitSUBC(N);
1685   case ISD::SSUBO:
1686   case ISD::USUBO:              return visitSUBO(N);
1687   case ISD::ADDE:               return visitADDE(N);
1688   case ISD::ADDCARRY:           return visitADDCARRY(N);
1689   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1690   case ISD::SUBE:               return visitSUBE(N);
1691   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1692   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1693   case ISD::SMULFIX:
1694   case ISD::SMULFIXSAT:
1695   case ISD::UMULFIX:
1696   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1697   case ISD::MUL:                return visitMUL(N);
1698   case ISD::SDIV:               return visitSDIV(N);
1699   case ISD::UDIV:               return visitUDIV(N);
1700   case ISD::SREM:
1701   case ISD::UREM:               return visitREM(N);
1702   case ISD::MULHU:              return visitMULHU(N);
1703   case ISD::MULHS:              return visitMULHS(N);
1704   case ISD::AVGFLOORS:
1705   case ISD::AVGFLOORU:
1706   case ISD::AVGCEILS:
1707   case ISD::AVGCEILU:           return visitAVG(N);
1708   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1709   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1710   case ISD::SMULO:
1711   case ISD::UMULO:              return visitMULO(N);
1712   case ISD::SMIN:
1713   case ISD::SMAX:
1714   case ISD::UMIN:
1715   case ISD::UMAX:               return visitIMINMAX(N);
1716   case ISD::AND:                return visitAND(N);
1717   case ISD::OR:                 return visitOR(N);
1718   case ISD::XOR:                return visitXOR(N);
1719   case ISD::SHL:                return visitSHL(N);
1720   case ISD::SRA:                return visitSRA(N);
1721   case ISD::SRL:                return visitSRL(N);
1722   case ISD::ROTR:
1723   case ISD::ROTL:               return visitRotate(N);
1724   case ISD::FSHL:
1725   case ISD::FSHR:               return visitFunnelShift(N);
1726   case ISD::SSHLSAT:
1727   case ISD::USHLSAT:            return visitSHLSAT(N);
1728   case ISD::ABS:                return visitABS(N);
1729   case ISD::BSWAP:              return visitBSWAP(N);
1730   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1731   case ISD::CTLZ:               return visitCTLZ(N);
1732   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1733   case ISD::CTTZ:               return visitCTTZ(N);
1734   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1735   case ISD::CTPOP:              return visitCTPOP(N);
1736   case ISD::SELECT:             return visitSELECT(N);
1737   case ISD::VSELECT:            return visitVSELECT(N);
1738   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1739   case ISD::SETCC:              return visitSETCC(N);
1740   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1741   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1742   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1743   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1744   case ISD::AssertSext:
1745   case ISD::AssertZext:         return visitAssertExt(N);
1746   case ISD::AssertAlign:        return visitAssertAlign(N);
1747   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1748   case ISD::SIGN_EXTEND_VECTOR_INREG:
1749   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
1750   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1751   case ISD::BITCAST:            return visitBITCAST(N);
1752   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1753   case ISD::FADD:               return visitFADD(N);
1754   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1755   case ISD::FSUB:               return visitFSUB(N);
1756   case ISD::FMUL:               return visitFMUL(N);
1757   case ISD::FMA:                return visitFMA(N);
1758   case ISD::FDIV:               return visitFDIV(N);
1759   case ISD::FREM:               return visitFREM(N);
1760   case ISD::FSQRT:              return visitFSQRT(N);
1761   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1762   case ISD::FPOW:               return visitFPOW(N);
1763   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1764   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1765   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1766   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1767   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1768   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1769   case ISD::FNEG:               return visitFNEG(N);
1770   case ISD::FABS:               return visitFABS(N);
1771   case ISD::FFLOOR:             return visitFFLOOR(N);
1772   case ISD::FMINNUM:
1773   case ISD::FMAXNUM:
1774   case ISD::FMINIMUM:
1775   case ISD::FMAXIMUM:           return visitFMinMax(N);
1776   case ISD::FCEIL:              return visitFCEIL(N);
1777   case ISD::FTRUNC:             return visitFTRUNC(N);
1778   case ISD::BRCOND:             return visitBRCOND(N);
1779   case ISD::BR_CC:              return visitBR_CC(N);
1780   case ISD::LOAD:               return visitLOAD(N);
1781   case ISD::STORE:              return visitSTORE(N);
1782   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1783   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1784   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1785   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1786   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1787   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1788   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1789   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1790   case ISD::MGATHER:            return visitMGATHER(N);
1791   case ISD::MLOAD:              return visitMLOAD(N);
1792   case ISD::MSCATTER:           return visitMSCATTER(N);
1793   case ISD::MSTORE:             return visitMSTORE(N);
1794   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1795   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1796   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1797   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
1798   case ISD::FREEZE:             return visitFREEZE(N);
1799   case ISD::VECREDUCE_FADD:
1800   case ISD::VECREDUCE_FMUL:
1801   case ISD::VECREDUCE_ADD:
1802   case ISD::VECREDUCE_MUL:
1803   case ISD::VECREDUCE_AND:
1804   case ISD::VECREDUCE_OR:
1805   case ISD::VECREDUCE_XOR:
1806   case ISD::VECREDUCE_SMAX:
1807   case ISD::VECREDUCE_SMIN:
1808   case ISD::VECREDUCE_UMAX:
1809   case ISD::VECREDUCE_UMIN:
1810   case ISD::VECREDUCE_FMAX:
1811   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1812 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
1813 #include "llvm/IR/VPIntrinsics.def"
1814     return visitVPOp(N);
1815   }
1816   return SDValue();
1817 }
1818 
1819 SDValue DAGCombiner::combine(SDNode *N) {
1820   SDValue RV;
1821   if (!DisableGenericCombines)
1822     RV = visit(N);
1823 
1824   // If nothing happened, try a target-specific DAG combine.
1825   if (!RV.getNode()) {
1826     assert(N->getOpcode() != ISD::DELETED_NODE &&
1827            "Node was deleted but visit returned NULL!");
1828 
1829     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1830         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1831 
1832       // Expose the DAG combiner to the target combiner impls.
1833       TargetLowering::DAGCombinerInfo
1834         DagCombineInfo(DAG, Level, false, this);
1835 
1836       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1837     }
1838   }
1839 
1840   // If nothing happened still, try promoting the operation.
1841   if (!RV.getNode()) {
1842     switch (N->getOpcode()) {
1843     default: break;
1844     case ISD::ADD:
1845     case ISD::SUB:
1846     case ISD::MUL:
1847     case ISD::AND:
1848     case ISD::OR:
1849     case ISD::XOR:
1850       RV = PromoteIntBinOp(SDValue(N, 0));
1851       break;
1852     case ISD::SHL:
1853     case ISD::SRA:
1854     case ISD::SRL:
1855       RV = PromoteIntShiftOp(SDValue(N, 0));
1856       break;
1857     case ISD::SIGN_EXTEND:
1858     case ISD::ZERO_EXTEND:
1859     case ISD::ANY_EXTEND:
1860       RV = PromoteExtend(SDValue(N, 0));
1861       break;
1862     case ISD::LOAD:
1863       if (PromoteLoad(SDValue(N, 0)))
1864         RV = SDValue(N, 0);
1865       break;
1866     }
1867   }
1868 
1869   // If N is a commutative binary node, try to eliminate it if the commuted
1870   // version is already present in the DAG.
1871   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1872       N->getNumValues() == 1) {
1873     SDValue N0 = N->getOperand(0);
1874     SDValue N1 = N->getOperand(1);
1875 
1876     // Constant operands are canonicalized to RHS.
1877     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1878       SDValue Ops[] = {N1, N0};
1879       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1880                                             N->getFlags());
1881       if (CSENode)
1882         return SDValue(CSENode, 0);
1883     }
1884   }
1885 
1886   return RV;
1887 }
1888 
1889 /// Given a node, return its input chain if it has one, otherwise return a null
1890 /// sd operand.
1891 static SDValue getInputChainForNode(SDNode *N) {
1892   if (unsigned NumOps = N->getNumOperands()) {
1893     if (N->getOperand(0).getValueType() == MVT::Other)
1894       return N->getOperand(0);
1895     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1896       return N->getOperand(NumOps-1);
1897     for (unsigned i = 1; i < NumOps-1; ++i)
1898       if (N->getOperand(i).getValueType() == MVT::Other)
1899         return N->getOperand(i);
1900   }
1901   return SDValue();
1902 }
1903 
1904 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1905   // If N has two operands, where one has an input chain equal to the other,
1906   // the 'other' chain is redundant.
1907   if (N->getNumOperands() == 2) {
1908     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1909       return N->getOperand(0);
1910     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1911       return N->getOperand(1);
1912   }
1913 
1914   // Don't simplify token factors if optnone.
1915   if (OptLevel == CodeGenOpt::None)
1916     return SDValue();
1917 
1918   // Don't simplify the token factor if the node itself has too many operands.
1919   if (N->getNumOperands() > TokenFactorInlineLimit)
1920     return SDValue();
1921 
1922   // If the sole user is a token factor, we should make sure we have a
1923   // chance to merge them together. This prevents TF chains from inhibiting
1924   // optimizations.
1925   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1926     AddToWorklist(*(N->use_begin()));
1927 
1928   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1929   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1930   SmallPtrSet<SDNode*, 16> SeenOps;
1931   bool Changed = false;             // If we should replace this token factor.
1932 
1933   // Start out with this token factor.
1934   TFs.push_back(N);
1935 
1936   // Iterate through token factors.  The TFs grows when new token factors are
1937   // encountered.
1938   for (unsigned i = 0; i < TFs.size(); ++i) {
1939     // Limit number of nodes to inline, to avoid quadratic compile times.
1940     // We have to add the outstanding Token Factors to Ops, otherwise we might
1941     // drop Ops from the resulting Token Factors.
1942     if (Ops.size() > TokenFactorInlineLimit) {
1943       for (unsigned j = i; j < TFs.size(); j++)
1944         Ops.emplace_back(TFs[j], 0);
1945       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1946       // combiner worklist later.
1947       TFs.resize(i);
1948       break;
1949     }
1950 
1951     SDNode *TF = TFs[i];
1952     // Check each of the operands.
1953     for (const SDValue &Op : TF->op_values()) {
1954       switch (Op.getOpcode()) {
1955       case ISD::EntryToken:
1956         // Entry tokens don't need to be added to the list. They are
1957         // redundant.
1958         Changed = true;
1959         break;
1960 
1961       case ISD::TokenFactor:
1962         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1963           // Queue up for processing.
1964           TFs.push_back(Op.getNode());
1965           Changed = true;
1966           break;
1967         }
1968         LLVM_FALLTHROUGH;
1969 
1970       default:
1971         // Only add if it isn't already in the list.
1972         if (SeenOps.insert(Op.getNode()).second)
1973           Ops.push_back(Op);
1974         else
1975           Changed = true;
1976         break;
1977       }
1978     }
1979   }
1980 
1981   // Re-visit inlined Token Factors, to clean them up in case they have been
1982   // removed. Skip the first Token Factor, as this is the current node.
1983   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1984     AddToWorklist(TFs[i]);
1985 
1986   // Remove Nodes that are chained to another node in the list. Do so
1987   // by walking up chains breath-first stopping when we've seen
1988   // another operand. In general we must climb to the EntryNode, but we can exit
1989   // early if we find all remaining work is associated with just one operand as
1990   // no further pruning is possible.
1991 
1992   // List of nodes to search through and original Ops from which they originate.
1993   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1994   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1995   SmallPtrSet<SDNode *, 16> SeenChains;
1996   bool DidPruneOps = false;
1997 
1998   unsigned NumLeftToConsider = 0;
1999   for (const SDValue &Op : Ops) {
2000     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2001     OpWorkCount.push_back(1);
2002   }
2003 
2004   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2005     // If this is an Op, we can remove the op from the list. Remark any
2006     // search associated with it as from the current OpNumber.
2007     if (SeenOps.contains(Op)) {
2008       Changed = true;
2009       DidPruneOps = true;
2010       unsigned OrigOpNumber = 0;
2011       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2012         OrigOpNumber++;
2013       assert((OrigOpNumber != Ops.size()) &&
2014              "expected to find TokenFactor Operand");
2015       // Re-mark worklist from OrigOpNumber to OpNumber
2016       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2017         if (Worklist[i].second == OrigOpNumber) {
2018           Worklist[i].second = OpNumber;
2019         }
2020       }
2021       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2022       OpWorkCount[OrigOpNumber] = 0;
2023       NumLeftToConsider--;
2024     }
2025     // Add if it's a new chain
2026     if (SeenChains.insert(Op).second) {
2027       OpWorkCount[OpNumber]++;
2028       Worklist.push_back(std::make_pair(Op, OpNumber));
2029     }
2030   };
2031 
2032   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2033     // We need at least be consider at least 2 Ops to prune.
2034     if (NumLeftToConsider <= 1)
2035       break;
2036     auto CurNode = Worklist[i].first;
2037     auto CurOpNumber = Worklist[i].second;
2038     assert((OpWorkCount[CurOpNumber] > 0) &&
2039            "Node should not appear in worklist");
2040     switch (CurNode->getOpcode()) {
2041     case ISD::EntryToken:
2042       // Hitting EntryToken is the only way for the search to terminate without
2043       // hitting
2044       // another operand's search. Prevent us from marking this operand
2045       // considered.
2046       NumLeftToConsider++;
2047       break;
2048     case ISD::TokenFactor:
2049       for (const SDValue &Op : CurNode->op_values())
2050         AddToWorklist(i, Op.getNode(), CurOpNumber);
2051       break;
2052     case ISD::LIFETIME_START:
2053     case ISD::LIFETIME_END:
2054     case ISD::CopyFromReg:
2055     case ISD::CopyToReg:
2056       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2057       break;
2058     default:
2059       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2060         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2061       break;
2062     }
2063     OpWorkCount[CurOpNumber]--;
2064     if (OpWorkCount[CurOpNumber] == 0)
2065       NumLeftToConsider--;
2066   }
2067 
2068   // If we've changed things around then replace token factor.
2069   if (Changed) {
2070     SDValue Result;
2071     if (Ops.empty()) {
2072       // The entry token is the only possible outcome.
2073       Result = DAG.getEntryNode();
2074     } else {
2075       if (DidPruneOps) {
2076         SmallVector<SDValue, 8> PrunedOps;
2077         //
2078         for (const SDValue &Op : Ops) {
2079           if (SeenChains.count(Op.getNode()) == 0)
2080             PrunedOps.push_back(Op);
2081         }
2082         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2083       } else {
2084         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2085       }
2086     }
2087     return Result;
2088   }
2089   return SDValue();
2090 }
2091 
2092 /// MERGE_VALUES can always be eliminated.
2093 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2094   WorklistRemover DeadNodes(*this);
2095   // Replacing results may cause a different MERGE_VALUES to suddenly
2096   // be CSE'd with N, and carry its uses with it. Iterate until no
2097   // uses remain, to ensure that the node can be safely deleted.
2098   // First add the users of this node to the work list so that they
2099   // can be tried again once they have new operands.
2100   AddUsersToWorklist(N);
2101   do {
2102     // Do as a single replacement to avoid rewalking use lists.
2103     SmallVector<SDValue, 8> Ops;
2104     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2105       Ops.push_back(N->getOperand(i));
2106     DAG.ReplaceAllUsesWith(N, Ops.data());
2107   } while (!N->use_empty());
2108   deleteAndRecombine(N);
2109   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2110 }
2111 
2112 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2113 /// ConstantSDNode pointer else nullptr.
2114 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2115   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2116   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2117 }
2118 
2119 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2120 /// and that N may be folded in the load / store addressing mode.
2121 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2122                                     const TargetLowering &TLI) {
2123   EVT VT;
2124   unsigned AS;
2125 
2126   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2127     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2128       return false;
2129     VT = LD->getMemoryVT();
2130     AS = LD->getAddressSpace();
2131   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2132     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2133       return false;
2134     VT = ST->getMemoryVT();
2135     AS = ST->getAddressSpace();
2136   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2137     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2138       return false;
2139     VT = LD->getMemoryVT();
2140     AS = LD->getAddressSpace();
2141   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2142     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2143       return false;
2144     VT = ST->getMemoryVT();
2145     AS = ST->getAddressSpace();
2146   } else {
2147     return false;
2148   }
2149 
2150   TargetLowering::AddrMode AM;
2151   if (N->getOpcode() == ISD::ADD) {
2152     AM.HasBaseReg = true;
2153     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2154     if (Offset)
2155       // [reg +/- imm]
2156       AM.BaseOffs = Offset->getSExtValue();
2157     else
2158       // [reg +/- reg]
2159       AM.Scale = 1;
2160   } else if (N->getOpcode() == ISD::SUB) {
2161     AM.HasBaseReg = true;
2162     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2163     if (Offset)
2164       // [reg +/- imm]
2165       AM.BaseOffs = -Offset->getSExtValue();
2166     else
2167       // [reg +/- reg]
2168       AM.Scale = 1;
2169   } else {
2170     return false;
2171   }
2172 
2173   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2174                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2175 }
2176 
2177 /// This inverts a canonicalization in IR that replaces a variable select arm
2178 /// with an identity constant. Codegen improves if we re-use the variable
2179 /// operand rather than load a constant. This can also be converted into a
2180 /// masked vector operation if the target supports it.
2181 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2182                                               bool ShouldCommuteOperands) {
2183   // Match a select as operand 1. The identity constant that we are looking for
2184   // is only valid as operand 1 of a non-commutative binop.
2185   SDValue N0 = N->getOperand(0);
2186   SDValue N1 = N->getOperand(1);
2187   if (ShouldCommuteOperands)
2188     std::swap(N0, N1);
2189 
2190   // TODO: Should this apply to scalar select too?
2191   if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT)
2192     return SDValue();
2193 
2194   unsigned Opcode = N->getOpcode();
2195   EVT VT = N->getValueType(0);
2196   SDValue Cond = N1.getOperand(0);
2197   SDValue TVal = N1.getOperand(1);
2198   SDValue FVal = N1.getOperand(2);
2199 
2200   // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity().
2201   // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()".
2202   // TODO: With fast-math (NSZ), allow the opposite-sign form of zero?
2203   auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) {
2204     if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) {
2205       switch (Opcode) {
2206       case ISD::FADD: // X + -0.0 --> X
2207         return C->isZero() && C->isNegative();
2208       case ISD::FSUB: // X - 0.0 --> X
2209         return C->isZero() && !C->isNegative();
2210       case ISD::FMUL: // X * 1.0 --> X
2211       case ISD::FDIV: // X / 1.0 --> X
2212         return C->isExactlyValue(1.0);
2213       }
2214     }
2215     if (ConstantSDNode *C = isConstOrConstSplat(V)) {
2216       switch (Opcode) {
2217       case ISD::ADD: // X + 0 --> X
2218       case ISD::SUB: // X - 0 --> X
2219       case ISD::SHL: // X << 0 --> X
2220       case ISD::SRA: // X s>> 0 --> X
2221       case ISD::SRL: // X u>> 0 --> X
2222         return C->isZero();
2223       case ISD::MUL: // X * 1 --> X
2224         return C->isOne();
2225       }
2226     }
2227     return false;
2228   };
2229 
2230   // This transform increases uses of N0, so freeze it to be safe.
2231   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2232   if (isIdentityConstantForOpcode(Opcode, TVal)) {
2233     SDValue F0 = DAG.getFreeze(N0);
2234     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2235     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2236   }
2237   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2238   if (isIdentityConstantForOpcode(Opcode, FVal)) {
2239     SDValue F0 = DAG.getFreeze(N0);
2240     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2241     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2242   }
2243 
2244   return SDValue();
2245 }
2246 
2247 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2248   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2249          "Unexpected binary operator");
2250 
2251   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2252   auto BinOpcode = BO->getOpcode();
2253   EVT VT = BO->getValueType(0);
2254   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2255     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2256       return Sel;
2257 
2258     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2259       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2260         return Sel;
2261   }
2262 
2263   // Don't do this unless the old select is going away. We want to eliminate the
2264   // binary operator, not replace a binop with a select.
2265   // TODO: Handle ISD::SELECT_CC.
2266   unsigned SelOpNo = 0;
2267   SDValue Sel = BO->getOperand(0);
2268   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2269     SelOpNo = 1;
2270     Sel = BO->getOperand(1);
2271   }
2272 
2273   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2274     return SDValue();
2275 
2276   SDValue CT = Sel.getOperand(1);
2277   if (!isConstantOrConstantVector(CT, true) &&
2278       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2279     return SDValue();
2280 
2281   SDValue CF = Sel.getOperand(2);
2282   if (!isConstantOrConstantVector(CF, true) &&
2283       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2284     return SDValue();
2285 
2286   // Bail out if any constants are opaque because we can't constant fold those.
2287   // The exception is "and" and "or" with either 0 or -1 in which case we can
2288   // propagate non constant operands into select. I.e.:
2289   // and (select Cond, 0, -1), X --> select Cond, 0, X
2290   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2291   bool CanFoldNonConst =
2292       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2293       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2294       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2295 
2296   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2297   if (!CanFoldNonConst &&
2298       !isConstantOrConstantVector(CBO, true) &&
2299       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2300     return SDValue();
2301 
2302   // We have a select-of-constants followed by a binary operator with a
2303   // constant. Eliminate the binop by pulling the constant math into the select.
2304   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2305   SDLoc DL(Sel);
2306   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2307                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2308   if (!CanFoldNonConst && !NewCT.isUndef() &&
2309       !isConstantOrConstantVector(NewCT, true) &&
2310       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2311     return SDValue();
2312 
2313   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2314                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2315   if (!CanFoldNonConst && !NewCF.isUndef() &&
2316       !isConstantOrConstantVector(NewCF, true) &&
2317       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2318     return SDValue();
2319 
2320   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2321   SelectOp->setFlags(BO->getFlags());
2322   return SelectOp;
2323 }
2324 
2325 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2326   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2327          "Expecting add or sub");
2328 
2329   // Match a constant operand and a zext operand for the math instruction:
2330   // add Z, C
2331   // sub C, Z
2332   bool IsAdd = N->getOpcode() == ISD::ADD;
2333   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2334   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2335   auto *CN = dyn_cast<ConstantSDNode>(C);
2336   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2337     return SDValue();
2338 
2339   // Match the zext operand as a setcc of a boolean.
2340   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2341       Z.getOperand(0).getValueType() != MVT::i1)
2342     return SDValue();
2343 
2344   // Match the compare as: setcc (X & 1), 0, eq.
2345   SDValue SetCC = Z.getOperand(0);
2346   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2347   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2348       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2349       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2350     return SDValue();
2351 
2352   // We are adding/subtracting a constant and an inverted low bit. Turn that
2353   // into a subtract/add of the low bit with incremented/decremented constant:
2354   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2355   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2356   EVT VT = C.getValueType();
2357   SDLoc DL(N);
2358   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2359   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2360                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2361   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2362 }
2363 
2364 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2365 /// a shift and add with a different constant.
2366 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2367   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2368          "Expecting add or sub");
2369 
2370   // We need a constant operand for the add/sub, and the other operand is a
2371   // logical shift right: add (srl), C or sub C, (srl).
2372   bool IsAdd = N->getOpcode() == ISD::ADD;
2373   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2374   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2375   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2376       ShiftOp.getOpcode() != ISD::SRL)
2377     return SDValue();
2378 
2379   // The shift must be of a 'not' value.
2380   SDValue Not = ShiftOp.getOperand(0);
2381   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2382     return SDValue();
2383 
2384   // The shift must be moving the sign bit to the least-significant-bit.
2385   EVT VT = ShiftOp.getValueType();
2386   SDValue ShAmt = ShiftOp.getOperand(1);
2387   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2388   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2389     return SDValue();
2390 
2391   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2392   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2393   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2394   SDLoc DL(N);
2395   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2396   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2397   if (SDValue NewC =
2398           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2399                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2400     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2401   return SDValue();
2402 }
2403 
2404 static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
2405   unsigned Opcode = V.getOpcode();
2406   if (Opcode == ISD::OR)
2407     return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1));
2408   if (Opcode == ISD::XOR)
2409     return isMinSignedConstant(V.getOperand(1));
2410   return false;
2411 }
2412 
2413 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2414 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2415 /// are no common bits set in the operands).
2416 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2417   SDValue N0 = N->getOperand(0);
2418   SDValue N1 = N->getOperand(1);
2419   EVT VT = N0.getValueType();
2420   SDLoc DL(N);
2421 
2422   // fold (add x, undef) -> undef
2423   if (N0.isUndef())
2424     return N0;
2425   if (N1.isUndef())
2426     return N1;
2427 
2428   // fold (add c1, c2) -> c1+c2
2429   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2430     return C;
2431 
2432   // canonicalize constant to RHS
2433   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2434       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2435     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2436 
2437   // fold vector ops
2438   if (VT.isVector()) {
2439     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2440       return FoldedVOp;
2441 
2442     // fold (add x, 0) -> x, vector edition
2443     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2444       return N0;
2445   }
2446 
2447   // fold (add x, 0) -> x
2448   if (isNullConstant(N1))
2449     return N0;
2450 
2451   if (N0.getOpcode() == ISD::SUB) {
2452     SDValue N00 = N0.getOperand(0);
2453     SDValue N01 = N0.getOperand(1);
2454 
2455     // fold ((A-c1)+c2) -> (A+(c2-c1))
2456     if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2457       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2458 
2459     // fold ((c1-A)+c2) -> (c1+c2)-A
2460     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2461       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2462   }
2463 
2464   // add (sext i1 X), 1 -> zext (not i1 X)
2465   // We don't transform this pattern:
2466   //   add (zext i1 X), -1 -> sext (not i1 X)
2467   // because most (?) targets generate better code for the zext form.
2468   if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2469       isOneOrOneSplat(N1)) {
2470     SDValue X = N0.getOperand(0);
2471     if ((!LegalOperations ||
2472          (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2473           TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2474         X.getScalarValueSizeInBits() == 1) {
2475       SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2476       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2477     }
2478   }
2479 
2480   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2481   // iff (or x, c0) is equivalent to (add x, c0).
2482   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2483   // iff (xor x, c0) is equivalent to (add x, c0).
2484   if (isADDLike(N0, DAG)) {
2485     SDValue N01 = N0.getOperand(1);
2486     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2487       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2488   }
2489 
2490   if (SDValue NewSel = foldBinOpIntoSelect(N))
2491     return NewSel;
2492 
2493   // reassociate add
2494   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2495     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2496       return RADD;
2497 
2498     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2499     // equivalent to (add x, c).
2500     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2501     // equivalent to (add x, c).
2502     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2503       if (isADDLike(N0, DAG) && N0.hasOneUse() &&
2504           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2505         return DAG.getNode(ISD::ADD, DL, VT,
2506                            DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2507                            N0.getOperand(1));
2508       }
2509       return SDValue();
2510     };
2511     if (SDValue Add = ReassociateAddOr(N0, N1))
2512       return Add;
2513     if (SDValue Add = ReassociateAddOr(N1, N0))
2514       return Add;
2515   }
2516   // fold ((0-A) + B) -> B-A
2517   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2518     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2519 
2520   // fold (A + (0-B)) -> A-B
2521   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2522     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2523 
2524   // fold (A+(B-A)) -> B
2525   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2526     return N1.getOperand(0);
2527 
2528   // fold ((B-A)+A) -> B
2529   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2530     return N0.getOperand(0);
2531 
2532   // fold ((A-B)+(C-A)) -> (C-B)
2533   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2534       N0.getOperand(0) == N1.getOperand(1))
2535     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2536                        N0.getOperand(1));
2537 
2538   // fold ((A-B)+(B-C)) -> (A-C)
2539   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2540       N0.getOperand(1) == N1.getOperand(0))
2541     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2542                        N1.getOperand(1));
2543 
2544   // fold (A+(B-(A+C))) to (B-C)
2545   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2546       N0 == N1.getOperand(1).getOperand(0))
2547     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2548                        N1.getOperand(1).getOperand(1));
2549 
2550   // fold (A+(B-(C+A))) to (B-C)
2551   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2552       N0 == N1.getOperand(1).getOperand(1))
2553     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2554                        N1.getOperand(1).getOperand(0));
2555 
2556   // fold (A+((B-A)+or-C)) to (B+or-C)
2557   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2558       N1.getOperand(0).getOpcode() == ISD::SUB &&
2559       N0 == N1.getOperand(0).getOperand(1))
2560     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2561                        N1.getOperand(1));
2562 
2563   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2564   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2565       N0->hasOneUse() && N1->hasOneUse()) {
2566     SDValue N00 = N0.getOperand(0);
2567     SDValue N01 = N0.getOperand(1);
2568     SDValue N10 = N1.getOperand(0);
2569     SDValue N11 = N1.getOperand(1);
2570 
2571     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2572       return DAG.getNode(ISD::SUB, DL, VT,
2573                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2574                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2575   }
2576 
2577   // fold (add (umax X, C), -C) --> (usubsat X, C)
2578   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2579     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2580       return (!Max && !Op) ||
2581              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2582     };
2583     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2584                                   /*AllowUndefs*/ true))
2585       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2586                          N0.getOperand(1));
2587   }
2588 
2589   if (SimplifyDemandedBits(SDValue(N, 0)))
2590     return SDValue(N, 0);
2591 
2592   if (isOneOrOneSplat(N1)) {
2593     // fold (add (xor a, -1), 1) -> (sub 0, a)
2594     if (isBitwiseNot(N0))
2595       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2596                          N0.getOperand(0));
2597 
2598     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2599     if (N0.getOpcode() == ISD::ADD) {
2600       SDValue A, Xor;
2601 
2602       if (isBitwiseNot(N0.getOperand(0))) {
2603         A = N0.getOperand(1);
2604         Xor = N0.getOperand(0);
2605       } else if (isBitwiseNot(N0.getOperand(1))) {
2606         A = N0.getOperand(0);
2607         Xor = N0.getOperand(1);
2608       }
2609 
2610       if (Xor)
2611         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2612     }
2613 
2614     // Look for:
2615     //   add (add x, y), 1
2616     // And if the target does not like this form then turn into:
2617     //   sub y, (xor x, -1)
2618     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2619         N0.hasOneUse()) {
2620       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2621                                 DAG.getAllOnesConstant(DL, VT));
2622       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2623     }
2624   }
2625 
2626   // (x - y) + -1  ->  add (xor y, -1), x
2627   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2628       isAllOnesOrAllOnesSplat(N1)) {
2629     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2630     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2631   }
2632 
2633   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2634     return Combined;
2635 
2636   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2637     return Combined;
2638 
2639   return SDValue();
2640 }
2641 
2642 SDValue DAGCombiner::visitADD(SDNode *N) {
2643   SDValue N0 = N->getOperand(0);
2644   SDValue N1 = N->getOperand(1);
2645   EVT VT = N0.getValueType();
2646   SDLoc DL(N);
2647 
2648   if (SDValue Combined = visitADDLike(N))
2649     return Combined;
2650 
2651   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2652     return V;
2653 
2654   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2655     return V;
2656 
2657   // fold (a+b) -> (a|b) iff a and b share no bits.
2658   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2659       DAG.haveNoCommonBitsSet(N0, N1))
2660     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2661 
2662   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2663   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2664     const APInt &C0 = N0->getConstantOperandAPInt(0);
2665     const APInt &C1 = N1->getConstantOperandAPInt(0);
2666     return DAG.getVScale(DL, VT, C0 + C1);
2667   }
2668 
2669   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2670   if ((N0.getOpcode() == ISD::ADD) &&
2671       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2672       (N1.getOpcode() == ISD::VSCALE)) {
2673     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2674     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2675     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2676     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2677   }
2678 
2679   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2680   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2681       N1.getOpcode() == ISD::STEP_VECTOR) {
2682     const APInt &C0 = N0->getConstantOperandAPInt(0);
2683     const APInt &C1 = N1->getConstantOperandAPInt(0);
2684     APInt NewStep = C0 + C1;
2685     return DAG.getStepVector(DL, VT, NewStep);
2686   }
2687 
2688   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2689   if ((N0.getOpcode() == ISD::ADD) &&
2690       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2691       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2692     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2693     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2694     APInt NewStep = SV0 + SV1;
2695     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2696     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2697   }
2698 
2699   return SDValue();
2700 }
2701 
2702 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2703   unsigned Opcode = N->getOpcode();
2704   SDValue N0 = N->getOperand(0);
2705   SDValue N1 = N->getOperand(1);
2706   EVT VT = N0.getValueType();
2707   SDLoc DL(N);
2708 
2709   // fold (add_sat x, undef) -> -1
2710   if (N0.isUndef() || N1.isUndef())
2711     return DAG.getAllOnesConstant(DL, VT);
2712 
2713   // fold (add_sat c1, c2) -> c3
2714   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
2715     return C;
2716 
2717   // canonicalize constant to RHS
2718   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2719       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2720     return DAG.getNode(Opcode, DL, VT, N1, N0);
2721 
2722   // fold vector ops
2723   if (VT.isVector()) {
2724     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2725       return FoldedVOp;
2726 
2727     // fold (add_sat x, 0) -> x, vector edition
2728     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2729       return N0;
2730   }
2731 
2732   // fold (add_sat x, 0) -> x
2733   if (isNullConstant(N1))
2734     return N0;
2735 
2736   // If it cannot overflow, transform into an add.
2737   if (Opcode == ISD::UADDSAT)
2738     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2739       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2740 
2741   return SDValue();
2742 }
2743 
2744 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2745   bool Masked = false;
2746 
2747   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2748   while (true) {
2749     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2750       V = V.getOperand(0);
2751       continue;
2752     }
2753 
2754     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2755       Masked = true;
2756       V = V.getOperand(0);
2757       continue;
2758     }
2759 
2760     break;
2761   }
2762 
2763   // If this is not a carry, return.
2764   if (V.getResNo() != 1)
2765     return SDValue();
2766 
2767   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2768       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2769     return SDValue();
2770 
2771   EVT VT = V->getValueType(0);
2772   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2773     return SDValue();
2774 
2775   // If the result is masked, then no matter what kind of bool it is we can
2776   // return. If it isn't, then we need to make sure the bool type is either 0 or
2777   // 1 and not other values.
2778   if (Masked ||
2779       TLI.getBooleanContents(V.getValueType()) ==
2780           TargetLoweringBase::ZeroOrOneBooleanContent)
2781     return V;
2782 
2783   return SDValue();
2784 }
2785 
2786 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2787 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2788 /// the opcode and bypass the mask operation.
2789 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2790                                  SelectionDAG &DAG, const SDLoc &DL) {
2791   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2792     return SDValue();
2793 
2794   EVT VT = N0.getValueType();
2795   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2796     return SDValue();
2797 
2798   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2799   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2800   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2801 }
2802 
2803 /// Helper for doing combines based on N0 and N1 being added to each other.
2804 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2805                                           SDNode *LocReference) {
2806   EVT VT = N0.getValueType();
2807   SDLoc DL(LocReference);
2808 
2809   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2810   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2811       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2812     return DAG.getNode(ISD::SUB, DL, VT, N0,
2813                        DAG.getNode(ISD::SHL, DL, VT,
2814                                    N1.getOperand(0).getOperand(1),
2815                                    N1.getOperand(1)));
2816 
2817   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2818     return V;
2819 
2820   // Look for:
2821   //   add (add x, 1), y
2822   // And if the target does not like this form then turn into:
2823   //   sub y, (xor x, -1)
2824   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2825       N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
2826     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2827                               DAG.getAllOnesConstant(DL, VT));
2828     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2829   }
2830 
2831   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
2832     // Hoist one-use subtraction by non-opaque constant:
2833     //   (x - C) + y  ->  (x + y) - C
2834     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2835     if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2836       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2837       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2838     }
2839     // Hoist one-use subtraction from non-opaque constant:
2840     //   (C - x) + y  ->  (y - x) + C
2841     if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2842       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2843       return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2844     }
2845   }
2846 
2847   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2848   // rather than 'add 0/-1' (the zext should get folded).
2849   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2850   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2851       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2852       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2853     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2854     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2855   }
2856 
2857   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2858   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2859     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2860     if (TN->getVT() == MVT::i1) {
2861       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2862                                  DAG.getConstant(1, DL, VT));
2863       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2864     }
2865   }
2866 
2867   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2868   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2869       N1.getResNo() == 0)
2870     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2871                        N0, N1.getOperand(0), N1.getOperand(2));
2872 
2873   // (add X, Carry) -> (addcarry X, 0, Carry)
2874   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2875     if (SDValue Carry = getAsCarry(TLI, N1))
2876       return DAG.getNode(ISD::ADDCARRY, DL,
2877                          DAG.getVTList(VT, Carry.getValueType()), N0,
2878                          DAG.getConstant(0, DL, VT), Carry);
2879 
2880   return SDValue();
2881 }
2882 
2883 SDValue DAGCombiner::visitADDC(SDNode *N) {
2884   SDValue N0 = N->getOperand(0);
2885   SDValue N1 = N->getOperand(1);
2886   EVT VT = N0.getValueType();
2887   SDLoc DL(N);
2888 
2889   // If the flag result is dead, turn this into an ADD.
2890   if (!N->hasAnyUseOfValue(1))
2891     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2892                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2893 
2894   // canonicalize constant to RHS.
2895   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2896   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2897   if (N0C && !N1C)
2898     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2899 
2900   // fold (addc x, 0) -> x + no carry out
2901   if (isNullConstant(N1))
2902     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2903                                         DL, MVT::Glue));
2904 
2905   // If it cannot overflow, transform into an add.
2906   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2907     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2908                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2909 
2910   return SDValue();
2911 }
2912 
2913 /**
2914  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2915  * then the flip also occurs if computing the inverse is the same cost.
2916  * This function returns an empty SDValue in case it cannot flip the boolean
2917  * without increasing the cost of the computation. If you want to flip a boolean
2918  * no matter what, use DAG.getLogicalNOT.
2919  */
2920 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2921                                   const TargetLowering &TLI,
2922                                   bool Force) {
2923   if (Force && isa<ConstantSDNode>(V))
2924     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2925 
2926   if (V.getOpcode() != ISD::XOR)
2927     return SDValue();
2928 
2929   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2930   if (!Const)
2931     return SDValue();
2932 
2933   EVT VT = V.getValueType();
2934 
2935   bool IsFlip = false;
2936   switch(TLI.getBooleanContents(VT)) {
2937     case TargetLowering::ZeroOrOneBooleanContent:
2938       IsFlip = Const->isOne();
2939       break;
2940     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2941       IsFlip = Const->isAllOnes();
2942       break;
2943     case TargetLowering::UndefinedBooleanContent:
2944       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2945       break;
2946   }
2947 
2948   if (IsFlip)
2949     return V.getOperand(0);
2950   if (Force)
2951     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2952   return SDValue();
2953 }
2954 
2955 SDValue DAGCombiner::visitADDO(SDNode *N) {
2956   SDValue N0 = N->getOperand(0);
2957   SDValue N1 = N->getOperand(1);
2958   EVT VT = N0.getValueType();
2959   bool IsSigned = (ISD::SADDO == N->getOpcode());
2960 
2961   EVT CarryVT = N->getValueType(1);
2962   SDLoc DL(N);
2963 
2964   // If the flag result is dead, turn this into an ADD.
2965   if (!N->hasAnyUseOfValue(1))
2966     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2967                      DAG.getUNDEF(CarryVT));
2968 
2969   // canonicalize constant to RHS.
2970   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2971       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2972     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2973 
2974   // fold (addo x, 0) -> x + no carry out
2975   if (isNullOrNullSplat(N1))
2976     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2977 
2978   if (!IsSigned) {
2979     // If it cannot overflow, transform into an add.
2980     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2981       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2982                        DAG.getConstant(0, DL, CarryVT));
2983 
2984     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2985     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2986       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2987                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2988       return CombineTo(
2989           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2990     }
2991 
2992     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2993       return Combined;
2994 
2995     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2996       return Combined;
2997   }
2998 
2999   return SDValue();
3000 }
3001 
3002 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3003   EVT VT = N0.getValueType();
3004   if (VT.isVector())
3005     return SDValue();
3006 
3007   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
3008   // If Y + 1 cannot overflow.
3009   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
3010     SDValue Y = N1.getOperand(0);
3011     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3012     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
3013       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
3014                          N1.getOperand(2));
3015   }
3016 
3017   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
3018   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
3019     if (SDValue Carry = getAsCarry(TLI, N1))
3020       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
3021                          DAG.getConstant(0, SDLoc(N), VT), Carry);
3022 
3023   return SDValue();
3024 }
3025 
3026 SDValue DAGCombiner::visitADDE(SDNode *N) {
3027   SDValue N0 = N->getOperand(0);
3028   SDValue N1 = N->getOperand(1);
3029   SDValue CarryIn = N->getOperand(2);
3030 
3031   // canonicalize constant to RHS
3032   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3033   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3034   if (N0C && !N1C)
3035     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3036                        N1, N0, CarryIn);
3037 
3038   // fold (adde x, y, false) -> (addc x, y)
3039   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3040     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3041 
3042   return SDValue();
3043 }
3044 
3045 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
3046   SDValue N0 = N->getOperand(0);
3047   SDValue N1 = N->getOperand(1);
3048   SDValue CarryIn = N->getOperand(2);
3049   SDLoc DL(N);
3050 
3051   // canonicalize constant to RHS
3052   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3053   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3054   if (N0C && !N1C)
3055     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
3056 
3057   // fold (addcarry x, y, false) -> (uaddo x, y)
3058   if (isNullConstant(CarryIn)) {
3059     if (!LegalOperations ||
3060         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3061       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3062   }
3063 
3064   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3065   if (isNullConstant(N0) && isNullConstant(N1)) {
3066     EVT VT = N0.getValueType();
3067     EVT CarryVT = CarryIn.getValueType();
3068     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3069     AddToWorklist(CarryExt.getNode());
3070     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3071                                     DAG.getConstant(1, DL, VT)),
3072                      DAG.getConstant(0, DL, CarryVT));
3073   }
3074 
3075   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
3076     return Combined;
3077 
3078   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
3079     return Combined;
3080 
3081   return SDValue();
3082 }
3083 
3084 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3085   SDValue N0 = N->getOperand(0);
3086   SDValue N1 = N->getOperand(1);
3087   SDValue CarryIn = N->getOperand(2);
3088   SDLoc DL(N);
3089 
3090   // canonicalize constant to RHS
3091   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3092   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3093   if (N0C && !N1C)
3094     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3095 
3096   // fold (saddo_carry x, y, false) -> (saddo x, y)
3097   if (isNullConstant(CarryIn)) {
3098     if (!LegalOperations ||
3099         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3100       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3101   }
3102 
3103   return SDValue();
3104 }
3105 
3106 /**
3107  * If we are facing some sort of diamond carry propapagtion pattern try to
3108  * break it up to generate something like:
3109  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
3110  *
3111  * The end result is usually an increase in operation required, but because the
3112  * carry is now linearized, other tranforms can kick in and optimize the DAG.
3113  *
3114  * Patterns typically look something like
3115  *            (uaddo A, B)
3116  *             /       \
3117  *          Carry      Sum
3118  *            |          \
3119  *            | (addcarry *, 0, Z)
3120  *            |       /
3121  *             \   Carry
3122  *              |   /
3123  * (addcarry X, *, *)
3124  *
3125  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3126  * produce a combine with a single path for carry propagation.
3127  */
3128 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3129                                       SDValue X, SDValue Carry0, SDValue Carry1,
3130                                       SDNode *N) {
3131   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3132     return SDValue();
3133   if (Carry1.getOpcode() != ISD::UADDO)
3134     return SDValue();
3135 
3136   SDValue Z;
3137 
3138   /**
3139    * First look for a suitable Z. It will present itself in the form of
3140    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3141    */
3142   if (Carry0.getOpcode() == ISD::ADDCARRY &&
3143       isNullConstant(Carry0.getOperand(1))) {
3144     Z = Carry0.getOperand(2);
3145   } else if (Carry0.getOpcode() == ISD::UADDO &&
3146              isOneConstant(Carry0.getOperand(1))) {
3147     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3148     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3149   } else {
3150     // We couldn't find a suitable Z.
3151     return SDValue();
3152   }
3153 
3154 
3155   auto cancelDiamond = [&](SDValue A,SDValue B) {
3156     SDLoc DL(N);
3157     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3158     Combiner.AddToWorklist(NewY.getNode());
3159     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3160                        DAG.getConstant(0, DL, X.getValueType()),
3161                        NewY.getValue(1));
3162   };
3163 
3164   /**
3165    *      (uaddo A, B)
3166    *           |
3167    *          Sum
3168    *           |
3169    * (addcarry *, 0, Z)
3170    */
3171   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3172     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3173   }
3174 
3175   /**
3176    * (addcarry A, 0, Z)
3177    *         |
3178    *        Sum
3179    *         |
3180    *  (uaddo *, B)
3181    */
3182   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3183     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3184   }
3185 
3186   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3187     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3188   }
3189 
3190   return SDValue();
3191 }
3192 
3193 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3194 // match patterns like:
3195 //
3196 //          (uaddo A, B)            CarryIn
3197 //            |  \                     |
3198 //            |   \                    |
3199 //    PartialSum   PartialCarryOutX   /
3200 //            |        |             /
3201 //            |    ____|____________/
3202 //            |   /    |
3203 //     (uaddo *, *)    \________
3204 //       |  \                   \
3205 //       |   \                   |
3206 //       |    PartialCarryOutY   |
3207 //       |        \              |
3208 //       |         \            /
3209 //   AddCarrySum    |    ______/
3210 //                  |   /
3211 //   CarryOut = (or *, *)
3212 //
3213 // And generate ADDCARRY (or SUBCARRY) with two result values:
3214 //
3215 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3216 //
3217 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3218 // a single path for carry/borrow out propagation:
3219 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3220                                    SDValue N0, SDValue N1, SDNode *N) {
3221   SDValue Carry0 = getAsCarry(TLI, N0);
3222   if (!Carry0)
3223     return SDValue();
3224   SDValue Carry1 = getAsCarry(TLI, N1);
3225   if (!Carry1)
3226     return SDValue();
3227 
3228   unsigned Opcode = Carry0.getOpcode();
3229   if (Opcode != Carry1.getOpcode())
3230     return SDValue();
3231   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3232     return SDValue();
3233 
3234   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3235   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3236   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3237     std::swap(Carry0, Carry1);
3238 
3239   // Check if nodes are connected in expected way.
3240   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3241       Carry1.getOperand(1) != Carry0.getValue(0))
3242     return SDValue();
3243 
3244   // The carry in value must be on the righthand side for subtraction.
3245   unsigned CarryInOperandNum =
3246       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3247   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3248     return SDValue();
3249   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3250 
3251   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3252   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3253     return SDValue();
3254 
3255   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3256   // TODO: make getAsCarry() aware of how partial carries are merged.
3257   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3258     return SDValue();
3259   CarryIn = CarryIn.getOperand(0);
3260   if (CarryIn.getValueType() != MVT::i1)
3261     return SDValue();
3262 
3263   SDLoc DL(N);
3264   SDValue Merged =
3265       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3266                   Carry0.getOperand(1), CarryIn);
3267 
3268   // Please note that because we have proven that the result of the UADDO/USUBO
3269   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3270   // therefore prove that if the first UADDO/USUBO overflows, the second
3271   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3272   // maximum value.
3273   //
3274   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3275   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3276   //
3277   // This is important because it means that OR and XOR can be used to merge
3278   // carry flags; and that AND can return a constant zero.
3279   //
3280   // TODO: match other operations that can merge flags (ADD, etc)
3281   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3282   if (N->getOpcode() == ISD::AND)
3283     return DAG.getConstant(0, DL, MVT::i1);
3284   return Merged.getValue(1);
3285 }
3286 
3287 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3288                                        SDNode *N) {
3289   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3290   if (isBitwiseNot(N0))
3291     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3292       SDLoc DL(N);
3293       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3294                                 N0.getOperand(0), NotC);
3295       return CombineTo(
3296           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3297     }
3298 
3299   // Iff the flag result is dead:
3300   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3301   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3302   // or the dependency between the instructions.
3303   if ((N0.getOpcode() == ISD::ADD ||
3304        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3305         N0.getValue(1) != CarryIn)) &&
3306       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3307     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3308                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3309 
3310   /**
3311    * When one of the addcarry argument is itself a carry, we may be facing
3312    * a diamond carry propagation. In which case we try to transform the DAG
3313    * to ensure linear carry propagation if that is possible.
3314    */
3315   if (auto Y = getAsCarry(TLI, N1)) {
3316     // Because both are carries, Y and Z can be swapped.
3317     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3318       return R;
3319     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3320       return R;
3321   }
3322 
3323   return SDValue();
3324 }
3325 
3326 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3327 // clamp/truncation if necessary.
3328 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3329                                    SDValue RHS, SelectionDAG &DAG,
3330                                    const SDLoc &DL) {
3331   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3332          "Illegal truncation");
3333 
3334   if (DstVT == SrcVT)
3335     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3336 
3337   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3338   // clamping RHS.
3339   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3340                                           DstVT.getScalarSizeInBits());
3341   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3342     return SDValue();
3343 
3344   SDValue SatLimit =
3345       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3346                                            DstVT.getScalarSizeInBits()),
3347                       DL, SrcVT);
3348   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3349   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3350   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3351   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3352 }
3353 
3354 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3355 // usubsat(a,b), optionally as a truncated type.
3356 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3357   if (N->getOpcode() != ISD::SUB ||
3358       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3359     return SDValue();
3360 
3361   EVT SubVT = N->getValueType(0);
3362   SDValue Op0 = N->getOperand(0);
3363   SDValue Op1 = N->getOperand(1);
3364 
3365   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3366   // they may be converted to usubsat(a,b).
3367   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3368     SDValue MaxLHS = Op0.getOperand(0);
3369     SDValue MaxRHS = Op0.getOperand(1);
3370     if (MaxLHS == Op1)
3371       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3372     if (MaxRHS == Op1)
3373       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3374   }
3375 
3376   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3377     SDValue MinLHS = Op1.getOperand(0);
3378     SDValue MinRHS = Op1.getOperand(1);
3379     if (MinLHS == Op0)
3380       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3381     if (MinRHS == Op0)
3382       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3383   }
3384 
3385   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3386   if (Op1.getOpcode() == ISD::TRUNCATE &&
3387       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3388       Op1.getOperand(0).hasOneUse()) {
3389     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3390     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3391     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3392       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3393                                  DAG, SDLoc(N));
3394     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3395       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3396                                  DAG, SDLoc(N));
3397   }
3398 
3399   return SDValue();
3400 }
3401 
3402 // Since it may not be valid to emit a fold to zero for vector initializers
3403 // check if we can before folding.
3404 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3405                              SelectionDAG &DAG, bool LegalOperations) {
3406   if (!VT.isVector())
3407     return DAG.getConstant(0, DL, VT);
3408   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3409     return DAG.getConstant(0, DL, VT);
3410   return SDValue();
3411 }
3412 
3413 SDValue DAGCombiner::visitSUB(SDNode *N) {
3414   SDValue N0 = N->getOperand(0);
3415   SDValue N1 = N->getOperand(1);
3416   EVT VT = N0.getValueType();
3417   SDLoc DL(N);
3418 
3419   auto PeekThroughFreeze = [](SDValue N) {
3420     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3421       return N->getOperand(0);
3422     return N;
3423   };
3424 
3425   // fold (sub x, x) -> 0
3426   // FIXME: Refactor this and xor and other similar operations together.
3427   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3428     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3429 
3430   // fold (sub c1, c2) -> c3
3431   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3432     return C;
3433 
3434   // fold vector ops
3435   if (VT.isVector()) {
3436     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3437       return FoldedVOp;
3438 
3439     // fold (sub x, 0) -> x, vector edition
3440     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3441       return N0;
3442   }
3443 
3444   if (SDValue NewSel = foldBinOpIntoSelect(N))
3445     return NewSel;
3446 
3447   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3448 
3449   // fold (sub x, c) -> (add x, -c)
3450   if (N1C) {
3451     return DAG.getNode(ISD::ADD, DL, VT, N0,
3452                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3453   }
3454 
3455   if (isNullOrNullSplat(N0)) {
3456     unsigned BitWidth = VT.getScalarSizeInBits();
3457     // Right-shifting everything out but the sign bit followed by negation is
3458     // the same as flipping arithmetic/logical shift type without the negation:
3459     // -(X >>u 31) -> (X >>s 31)
3460     // -(X >>s 31) -> (X >>u 31)
3461     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3462       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3463       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3464         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3465         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3466           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3467       }
3468     }
3469 
3470     // 0 - X --> 0 if the sub is NUW.
3471     if (N->getFlags().hasNoUnsignedWrap())
3472       return N0;
3473 
3474     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3475       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3476       // N1 must be 0 because negating the minimum signed value is undefined.
3477       if (N->getFlags().hasNoSignedWrap())
3478         return N0;
3479 
3480       // 0 - X --> X if X is 0 or the minimum signed value.
3481       return N1;
3482     }
3483 
3484     // Convert 0 - abs(x).
3485     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3486         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3487       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3488         return Result;
3489 
3490     // Fold neg(splat(neg(x)) -> splat(x)
3491     if (VT.isVector()) {
3492       SDValue N1S = DAG.getSplatValue(N1, true);
3493       if (N1S && N1S.getOpcode() == ISD::SUB &&
3494           isNullConstant(N1S.getOperand(0))) {
3495         if (VT.isScalableVector())
3496           return DAG.getSplatVector(VT, DL, N1S.getOperand(1));
3497         return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1));
3498       }
3499     }
3500   }
3501 
3502   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3503   if (isAllOnesOrAllOnesSplat(N0))
3504     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3505 
3506   // fold (A - (0-B)) -> A+B
3507   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3508     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3509 
3510   // fold A-(A-B) -> B
3511   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3512     return N1.getOperand(1);
3513 
3514   // fold (A+B)-A -> B
3515   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3516     return N0.getOperand(1);
3517 
3518   // fold (A+B)-B -> A
3519   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3520     return N0.getOperand(0);
3521 
3522   // fold (A+C1)-C2 -> A+(C1-C2)
3523   if (N0.getOpcode() == ISD::ADD) {
3524     SDValue N01 = N0.getOperand(1);
3525     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3526       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3527   }
3528 
3529   // fold C2-(A+C1) -> (C2-C1)-A
3530   if (N1.getOpcode() == ISD::ADD) {
3531     SDValue N11 = N1.getOperand(1);
3532     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3533       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3534   }
3535 
3536   // fold (A-C1)-C2 -> A-(C1+C2)
3537   if (N0.getOpcode() == ISD::SUB) {
3538     SDValue N01 = N0.getOperand(1);
3539     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3540       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3541   }
3542 
3543   // fold (c1-A)-c2 -> (c1-c2)-A
3544   if (N0.getOpcode() == ISD::SUB) {
3545     SDValue N00 = N0.getOperand(0);
3546     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3547       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3548   }
3549 
3550   // fold ((A+(B+or-C))-B) -> A+or-C
3551   if (N0.getOpcode() == ISD::ADD &&
3552       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3553        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3554       N0.getOperand(1).getOperand(0) == N1)
3555     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3556                        N0.getOperand(1).getOperand(1));
3557 
3558   // fold ((A+(C+B))-B) -> A+C
3559   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3560       N0.getOperand(1).getOperand(1) == N1)
3561     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3562                        N0.getOperand(1).getOperand(0));
3563 
3564   // fold ((A-(B-C))-C) -> A-B
3565   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3566       N0.getOperand(1).getOperand(1) == N1)
3567     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3568                        N0.getOperand(1).getOperand(0));
3569 
3570   // fold (A-(B-C)) -> A+(C-B)
3571   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3572     return DAG.getNode(ISD::ADD, DL, VT, N0,
3573                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3574                                    N1.getOperand(0)));
3575 
3576   // A - (A & B)  ->  A & (~B)
3577   if (N1.getOpcode() == ISD::AND) {
3578     SDValue A = N1.getOperand(0);
3579     SDValue B = N1.getOperand(1);
3580     if (A != N0)
3581       std::swap(A, B);
3582     if (A == N0 &&
3583         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3584       SDValue InvB =
3585           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3586       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3587     }
3588   }
3589 
3590   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3591   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3592     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3593         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3594       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3595                                 N1.getOperand(0).getOperand(1),
3596                                 N1.getOperand(1));
3597       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3598     }
3599     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3600         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3601       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3602                                 N1.getOperand(0),
3603                                 N1.getOperand(1).getOperand(1));
3604       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3605     }
3606   }
3607 
3608   // If either operand of a sub is undef, the result is undef
3609   if (N0.isUndef())
3610     return N0;
3611   if (N1.isUndef())
3612     return N1;
3613 
3614   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3615     return V;
3616 
3617   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3618     return V;
3619 
3620   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3621     return V;
3622 
3623   if (SDValue V = foldSubToUSubSat(VT, N))
3624     return V;
3625 
3626   // (x - y) - 1  ->  add (xor y, -1), x
3627   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3628     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3629                               DAG.getAllOnesConstant(DL, VT));
3630     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3631   }
3632 
3633   // Look for:
3634   //   sub y, (xor x, -1)
3635   // And if the target does not like this form then turn into:
3636   //   add (add x, y), 1
3637   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3638     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3639     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3640   }
3641 
3642   // Hoist one-use addition by non-opaque constant:
3643   //   (x + C) - y  ->  (x - y) + C
3644   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3645       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3646     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3647     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3648   }
3649   // y - (x + C)  ->  (y - x) - C
3650   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3651       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3652     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3653     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3654   }
3655   // (x - C) - y  ->  (x - y) - C
3656   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3657   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3658       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3659     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3660     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3661   }
3662   // (C - x) - y  ->  C - (x + y)
3663   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3664       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3665     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3666     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3667   }
3668 
3669   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3670   // rather than 'sub 0/1' (the sext should get folded).
3671   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3672   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3673       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3674       TLI.getBooleanContents(VT) ==
3675           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3676     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3677     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3678   }
3679 
3680   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3681   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3682     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3683       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3684       SDValue S0 = N1.getOperand(0);
3685       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3686         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3687           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3688             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3689     }
3690   }
3691 
3692   // If the relocation model supports it, consider symbol offsets.
3693   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3694     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3695       // fold (sub Sym, c) -> Sym-c
3696       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3697         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3698                                     GA->getOffset() -
3699                                         (uint64_t)N1C->getSExtValue());
3700       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3701       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3702         if (GA->getGlobal() == GB->getGlobal())
3703           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3704                                  DL, VT);
3705     }
3706 
3707   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3708   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3709     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3710     if (TN->getVT() == MVT::i1) {
3711       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3712                                  DAG.getConstant(1, DL, VT));
3713       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3714     }
3715   }
3716 
3717   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
3718   if (N1.getOpcode() == ISD::VSCALE) {
3719     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3720     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3721   }
3722 
3723   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
3724   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
3725     APInt NewStep = -N1.getConstantOperandAPInt(0);
3726     return DAG.getNode(ISD::ADD, DL, VT, N0,
3727                        DAG.getStepVector(DL, VT, NewStep));
3728   }
3729 
3730   // Prefer an add for more folding potential and possibly better codegen:
3731   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3732   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3733     SDValue ShAmt = N1.getOperand(1);
3734     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3735     if (ShAmtC &&
3736         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3737       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3738       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3739     }
3740   }
3741 
3742   // As with the previous fold, prefer add for more folding potential.
3743   // Subtracting SMIN/0 is the same as adding SMIN/0:
3744   // N0 - (X << BW-1) --> N0 + (X << BW-1)
3745   if (N1.getOpcode() == ISD::SHL) {
3746     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
3747     if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
3748       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
3749   }
3750 
3751   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3752     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3753     if (SDValue Carry = getAsCarry(TLI, N0)) {
3754       SDValue X = N1;
3755       SDValue Zero = DAG.getConstant(0, DL, VT);
3756       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3757       return DAG.getNode(ISD::ADDCARRY, DL,
3758                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3759                          Carry);
3760     }
3761   }
3762 
3763   return SDValue();
3764 }
3765 
3766 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3767   SDValue N0 = N->getOperand(0);
3768   SDValue N1 = N->getOperand(1);
3769   EVT VT = N0.getValueType();
3770   SDLoc DL(N);
3771 
3772   // fold (sub_sat x, undef) -> 0
3773   if (N0.isUndef() || N1.isUndef())
3774     return DAG.getConstant(0, DL, VT);
3775 
3776   // fold (sub_sat x, x) -> 0
3777   if (N0 == N1)
3778     return DAG.getConstant(0, DL, VT);
3779 
3780   // fold (sub_sat c1, c2) -> c3
3781   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3782     return C;
3783 
3784   // fold vector ops
3785   if (VT.isVector()) {
3786     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3787       return FoldedVOp;
3788 
3789     // fold (sub_sat x, 0) -> x, vector edition
3790     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3791       return N0;
3792   }
3793 
3794   // fold (sub_sat x, 0) -> x
3795   if (isNullConstant(N1))
3796     return N0;
3797 
3798   return SDValue();
3799 }
3800 
3801 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3802   SDValue N0 = N->getOperand(0);
3803   SDValue N1 = N->getOperand(1);
3804   EVT VT = N0.getValueType();
3805   SDLoc DL(N);
3806 
3807   // If the flag result is dead, turn this into an SUB.
3808   if (!N->hasAnyUseOfValue(1))
3809     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3810                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3811 
3812   // fold (subc x, x) -> 0 + no borrow
3813   if (N0 == N1)
3814     return CombineTo(N, DAG.getConstant(0, DL, VT),
3815                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3816 
3817   // fold (subc x, 0) -> x + no borrow
3818   if (isNullConstant(N1))
3819     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3820 
3821   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3822   if (isAllOnesConstant(N0))
3823     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3824                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3825 
3826   return SDValue();
3827 }
3828 
3829 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3830   SDValue N0 = N->getOperand(0);
3831   SDValue N1 = N->getOperand(1);
3832   EVT VT = N0.getValueType();
3833   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3834 
3835   EVT CarryVT = N->getValueType(1);
3836   SDLoc DL(N);
3837 
3838   // If the flag result is dead, turn this into an SUB.
3839   if (!N->hasAnyUseOfValue(1))
3840     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3841                      DAG.getUNDEF(CarryVT));
3842 
3843   // fold (subo x, x) -> 0 + no borrow
3844   if (N0 == N1)
3845     return CombineTo(N, DAG.getConstant(0, DL, VT),
3846                      DAG.getConstant(0, DL, CarryVT));
3847 
3848   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3849 
3850   // fold (subox, c) -> (addo x, -c)
3851   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3852     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3853                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3854   }
3855 
3856   // fold (subo x, 0) -> x + no borrow
3857   if (isNullOrNullSplat(N1))
3858     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3859 
3860   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3861   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3862     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3863                      DAG.getConstant(0, DL, CarryVT));
3864 
3865   return SDValue();
3866 }
3867 
3868 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3869   SDValue N0 = N->getOperand(0);
3870   SDValue N1 = N->getOperand(1);
3871   SDValue CarryIn = N->getOperand(2);
3872 
3873   // fold (sube x, y, false) -> (subc x, y)
3874   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3875     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3876 
3877   return SDValue();
3878 }
3879 
3880 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3881   SDValue N0 = N->getOperand(0);
3882   SDValue N1 = N->getOperand(1);
3883   SDValue CarryIn = N->getOperand(2);
3884 
3885   // fold (subcarry x, y, false) -> (usubo x, y)
3886   if (isNullConstant(CarryIn)) {
3887     if (!LegalOperations ||
3888         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3889       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3890   }
3891 
3892   return SDValue();
3893 }
3894 
3895 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3896   SDValue N0 = N->getOperand(0);
3897   SDValue N1 = N->getOperand(1);
3898   SDValue CarryIn = N->getOperand(2);
3899 
3900   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3901   if (isNullConstant(CarryIn)) {
3902     if (!LegalOperations ||
3903         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3904       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3905   }
3906 
3907   return SDValue();
3908 }
3909 
3910 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3911 // UMULFIXSAT here.
3912 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3913   SDValue N0 = N->getOperand(0);
3914   SDValue N1 = N->getOperand(1);
3915   SDValue Scale = N->getOperand(2);
3916   EVT VT = N0.getValueType();
3917 
3918   // fold (mulfix x, undef, scale) -> 0
3919   if (N0.isUndef() || N1.isUndef())
3920     return DAG.getConstant(0, SDLoc(N), VT);
3921 
3922   // Canonicalize constant to RHS (vector doesn't have to splat)
3923   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3924      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3925     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3926 
3927   // fold (mulfix x, 0, scale) -> 0
3928   if (isNullConstant(N1))
3929     return DAG.getConstant(0, SDLoc(N), VT);
3930 
3931   return SDValue();
3932 }
3933 
3934 SDValue DAGCombiner::visitMUL(SDNode *N) {
3935   SDValue N0 = N->getOperand(0);
3936   SDValue N1 = N->getOperand(1);
3937   EVT VT = N0.getValueType();
3938   SDLoc DL(N);
3939 
3940   // fold (mul x, undef) -> 0
3941   if (N0.isUndef() || N1.isUndef())
3942     return DAG.getConstant(0, DL, VT);
3943 
3944   // fold (mul c1, c2) -> c1*c2
3945   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
3946     return C;
3947 
3948   // canonicalize constant to RHS (vector doesn't have to splat)
3949   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3950       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3951     return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
3952 
3953   bool N1IsConst = false;
3954   bool N1IsOpaqueConst = false;
3955   APInt ConstValue1;
3956 
3957   // fold vector ops
3958   if (VT.isVector()) {
3959     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3960       return FoldedVOp;
3961 
3962     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3963     assert((!N1IsConst ||
3964             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3965            "Splat APInt should be element width");
3966   } else {
3967     N1IsConst = isa<ConstantSDNode>(N1);
3968     if (N1IsConst) {
3969       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3970       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3971     }
3972   }
3973 
3974   // fold (mul x, 0) -> 0
3975   if (N1IsConst && ConstValue1.isZero())
3976     return N1;
3977 
3978   // fold (mul x, 1) -> x
3979   if (N1IsConst && ConstValue1.isOne())
3980     return N0;
3981 
3982   if (SDValue NewSel = foldBinOpIntoSelect(N))
3983     return NewSel;
3984 
3985   // fold (mul x, -1) -> 0-x
3986   if (N1IsConst && ConstValue1.isAllOnes())
3987     return DAG.getNode(ISD::SUB, DL, VT,
3988                        DAG.getConstant(0, DL, VT), N0);
3989 
3990   // fold (mul x, (1 << c)) -> x << c
3991   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3992       DAG.isKnownToBeAPowerOfTwo(N1) &&
3993       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3994     SDValue LogBase2 = BuildLogBase2(N1, DL);
3995     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3996     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3997     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3998   }
3999 
4000   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4001   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4002     unsigned Log2Val = (-ConstValue1).logBase2();
4003     // FIXME: If the input is something that is easily negated (e.g. a
4004     // single-use add), we should put the negate there.
4005     return DAG.getNode(ISD::SUB, DL, VT,
4006                        DAG.getConstant(0, DL, VT),
4007                        DAG.getNode(ISD::SHL, DL, VT, N0,
4008                             DAG.getConstant(Log2Val, DL,
4009                                       getShiftAmountTy(N0.getValueType()))));
4010   }
4011 
4012   // Try to transform:
4013   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4014   // mul x, (2^N + 1) --> add (shl x, N), x
4015   // mul x, (2^N - 1) --> sub (shl x, N), x
4016   // Examples: x * 33 --> (x << 5) + x
4017   //           x * 15 --> (x << 4) - x
4018   //           x * -33 --> -((x << 5) + x)
4019   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4020   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4021   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4022   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4023   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4024   //           x * 0xf800 --> (x << 16) - (x << 11)
4025   //           x * -0x8800 --> -((x << 15) + (x << 11))
4026   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4027   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4028     // TODO: We could handle more general decomposition of any constant by
4029     //       having the target set a limit on number of ops and making a
4030     //       callback to determine that sequence (similar to sqrt expansion).
4031     unsigned MathOp = ISD::DELETED_NODE;
4032     APInt MulC = ConstValue1.abs();
4033     // The constant `2` should be treated as (2^0 + 1).
4034     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
4035     MulC.lshrInPlace(TZeros);
4036     if ((MulC - 1).isPowerOf2())
4037       MathOp = ISD::ADD;
4038     else if ((MulC + 1).isPowerOf2())
4039       MathOp = ISD::SUB;
4040 
4041     if (MathOp != ISD::DELETED_NODE) {
4042       unsigned ShAmt =
4043           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4044       ShAmt += TZeros;
4045       assert(ShAmt < VT.getScalarSizeInBits() &&
4046              "multiply-by-constant generated out of bounds shift");
4047       SDValue Shl =
4048           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4049       SDValue R =
4050           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4051                                DAG.getNode(ISD::SHL, DL, VT, N0,
4052                                            DAG.getConstant(TZeros, DL, VT)))
4053                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4054       if (ConstValue1.isNegative())
4055         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
4056       return R;
4057     }
4058   }
4059 
4060   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4061   if (N0.getOpcode() == ISD::SHL) {
4062     SDValue N01 = N0.getOperand(1);
4063     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4064       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4065   }
4066 
4067   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4068   // use.
4069   {
4070     SDValue Sh, Y;
4071 
4072     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4073     if (N0.getOpcode() == ISD::SHL &&
4074         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4075       Sh = N0; Y = N1;
4076     } else if (N1.getOpcode() == ISD::SHL &&
4077                isConstantOrConstantVector(N1.getOperand(1)) &&
4078                N1->hasOneUse()) {
4079       Sh = N1; Y = N0;
4080     }
4081 
4082     if (Sh.getNode()) {
4083       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4084       return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4085     }
4086   }
4087 
4088   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4089   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4090       N0.getOpcode() == ISD::ADD &&
4091       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4092       isMulAddWithConstProfitable(N, N0, N1))
4093     return DAG.getNode(
4094         ISD::ADD, DL, VT,
4095         DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4096         DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4097 
4098   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4099   if (N0.getOpcode() == ISD::VSCALE)
4100     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
4101       const APInt &C0 = N0.getConstantOperandAPInt(0);
4102       const APInt &C1 = NC1->getAPIntValue();
4103       return DAG.getVScale(DL, VT, C0 * C1);
4104     }
4105 
4106   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4107   APInt MulVal;
4108   if (N0.getOpcode() == ISD::STEP_VECTOR)
4109     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4110       const APInt &C0 = N0.getConstantOperandAPInt(0);
4111       APInt NewStep = C0 * MulVal;
4112       return DAG.getStepVector(DL, VT, NewStep);
4113     }
4114 
4115   // Fold ((mul x, 0/undef) -> 0,
4116   //       (mul x, 1) -> x) -> x)
4117   // -> and(x, mask)
4118   // We can replace vectors with '0' and '1' factors with a clearing mask.
4119   if (VT.isFixedLengthVector()) {
4120     unsigned NumElts = VT.getVectorNumElements();
4121     SmallBitVector ClearMask;
4122     ClearMask.reserve(NumElts);
4123     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4124       if (!V || V->isZero()) {
4125         ClearMask.push_back(true);
4126         return true;
4127       }
4128       ClearMask.push_back(false);
4129       return V->isOne();
4130     };
4131     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4132         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4133       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4134       EVT LegalSVT = N1.getOperand(0).getValueType();
4135       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4136       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4137       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4138       for (unsigned I = 0; I != NumElts; ++I)
4139         if (ClearMask[I])
4140           Mask[I] = Zero;
4141       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4142     }
4143   }
4144 
4145   // reassociate mul
4146   if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4147     return RMUL;
4148 
4149   return SDValue();
4150 }
4151 
4152 /// Return true if divmod libcall is available.
4153 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4154                                      const TargetLowering &TLI) {
4155   RTLIB::Libcall LC;
4156   EVT NodeType = Node->getValueType(0);
4157   if (!NodeType.isSimple())
4158     return false;
4159   switch (NodeType.getSimpleVT().SimpleTy) {
4160   default: return false; // No libcall for vector types.
4161   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4162   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4163   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4164   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4165   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4166   }
4167 
4168   return TLI.getLibcallName(LC) != nullptr;
4169 }
4170 
4171 /// Issue divrem if both quotient and remainder are needed.
4172 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4173   if (Node->use_empty())
4174     return SDValue(); // This is a dead node, leave it alone.
4175 
4176   unsigned Opcode = Node->getOpcode();
4177   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4178   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4179 
4180   // DivMod lib calls can still work on non-legal types if using lib-calls.
4181   EVT VT = Node->getValueType(0);
4182   if (VT.isVector() || !VT.isInteger())
4183     return SDValue();
4184 
4185   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4186     return SDValue();
4187 
4188   // If DIVREM is going to get expanded into a libcall,
4189   // but there is no libcall available, then don't combine.
4190   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4191       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4192     return SDValue();
4193 
4194   // If div is legal, it's better to do the normal expansion
4195   unsigned OtherOpcode = 0;
4196   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4197     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4198     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4199       return SDValue();
4200   } else {
4201     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4202     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4203       return SDValue();
4204   }
4205 
4206   SDValue Op0 = Node->getOperand(0);
4207   SDValue Op1 = Node->getOperand(1);
4208   SDValue combined;
4209   for (SDNode *User : Op0->uses()) {
4210     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4211         User->use_empty())
4212       continue;
4213     // Convert the other matching node(s), too;
4214     // otherwise, the DIVREM may get target-legalized into something
4215     // target-specific that we won't be able to recognize.
4216     unsigned UserOpc = User->getOpcode();
4217     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4218         User->getOperand(0) == Op0 &&
4219         User->getOperand(1) == Op1) {
4220       if (!combined) {
4221         if (UserOpc == OtherOpcode) {
4222           SDVTList VTs = DAG.getVTList(VT, VT);
4223           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4224         } else if (UserOpc == DivRemOpc) {
4225           combined = SDValue(User, 0);
4226         } else {
4227           assert(UserOpc == Opcode);
4228           continue;
4229         }
4230       }
4231       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4232         CombineTo(User, combined);
4233       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4234         CombineTo(User, combined.getValue(1));
4235     }
4236   }
4237   return combined;
4238 }
4239 
4240 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4241   SDValue N0 = N->getOperand(0);
4242   SDValue N1 = N->getOperand(1);
4243   EVT VT = N->getValueType(0);
4244   SDLoc DL(N);
4245 
4246   unsigned Opc = N->getOpcode();
4247   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4248   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4249 
4250   // X / undef -> undef
4251   // X % undef -> undef
4252   // X / 0 -> undef
4253   // X % 0 -> undef
4254   // NOTE: This includes vectors where any divisor element is zero/undef.
4255   if (DAG.isUndef(Opc, {N0, N1}))
4256     return DAG.getUNDEF(VT);
4257 
4258   // undef / X -> 0
4259   // undef % X -> 0
4260   if (N0.isUndef())
4261     return DAG.getConstant(0, DL, VT);
4262 
4263   // 0 / X -> 0
4264   // 0 % X -> 0
4265   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4266   if (N0C && N0C->isZero())
4267     return N0;
4268 
4269   // X / X -> 1
4270   // X % X -> 0
4271   if (N0 == N1)
4272     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4273 
4274   // X / 1 -> X
4275   // X % 1 -> 0
4276   // If this is a boolean op (single-bit element type), we can't have
4277   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4278   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4279   // it's a 1.
4280   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4281     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4282 
4283   return SDValue();
4284 }
4285 
4286 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4287   SDValue N0 = N->getOperand(0);
4288   SDValue N1 = N->getOperand(1);
4289   EVT VT = N->getValueType(0);
4290   EVT CCVT = getSetCCResultType(VT);
4291   SDLoc DL(N);
4292 
4293   // fold (sdiv c1, c2) -> c1/c2
4294   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4295     return C;
4296 
4297   // fold vector ops
4298   if (VT.isVector())
4299     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4300       return FoldedVOp;
4301 
4302   // fold (sdiv X, -1) -> 0-X
4303   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4304   if (N1C && N1C->isAllOnes())
4305     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4306 
4307   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4308   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4309     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4310                          DAG.getConstant(1, DL, VT),
4311                          DAG.getConstant(0, DL, VT));
4312 
4313   if (SDValue V = simplifyDivRem(N, DAG))
4314     return V;
4315 
4316   if (SDValue NewSel = foldBinOpIntoSelect(N))
4317     return NewSel;
4318 
4319   // If we know the sign bits of both operands are zero, strength reduce to a
4320   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4321   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4322     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4323 
4324   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4325     // If the corresponding remainder node exists, update its users with
4326     // (Dividend - (Quotient * Divisor).
4327     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4328                                               { N0, N1 })) {
4329       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4330       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4331       AddToWorklist(Mul.getNode());
4332       AddToWorklist(Sub.getNode());
4333       CombineTo(RemNode, Sub);
4334     }
4335     return V;
4336   }
4337 
4338   // sdiv, srem -> sdivrem
4339   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4340   // true.  Otherwise, we break the simplification logic in visitREM().
4341   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4342   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4343     if (SDValue DivRem = useDivRem(N))
4344         return DivRem;
4345 
4346   return SDValue();
4347 }
4348 
4349 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4350   // Helper for determining whether a value is a power-2 constant scalar or a
4351   // vector of such elements.
4352   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4353     if (C->isZero() || C->isOpaque())
4354       return false;
4355     if (C->getAPIntValue().isPowerOf2())
4356       return true;
4357     if (C->getAPIntValue().isNegatedPowerOf2())
4358       return true;
4359     return false;
4360   };
4361 
4362   return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4363 }
4364 
4365 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4366   SDLoc DL(N);
4367   EVT VT = N->getValueType(0);
4368   EVT CCVT = getSetCCResultType(VT);
4369   unsigned BitWidth = VT.getScalarSizeInBits();
4370 
4371   // fold (sdiv X, pow2) -> simple ops after legalize
4372   // FIXME: We check for the exact bit here because the generic lowering gives
4373   // better results in that case. The target-specific lowering should learn how
4374   // to handle exact sdivs efficiently.
4375   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4376     // Target-specific implementation of sdiv x, pow2.
4377     if (SDValue Res = BuildSDIVPow2(N))
4378       return Res;
4379 
4380     // Create constants that are functions of the shift amount value.
4381     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4382     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4383     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4384     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4385     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4386     if (!isConstantOrConstantVector(Inexact))
4387       return SDValue();
4388 
4389     // Splat the sign bit into the register
4390     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4391                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4392     AddToWorklist(Sign.getNode());
4393 
4394     // Add (N0 < 0) ? abs2 - 1 : 0;
4395     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4396     AddToWorklist(Srl.getNode());
4397     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4398     AddToWorklist(Add.getNode());
4399     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4400     AddToWorklist(Sra.getNode());
4401 
4402     // Special case: (sdiv X, 1) -> X
4403     // Special Case: (sdiv X, -1) -> 0-X
4404     SDValue One = DAG.getConstant(1, DL, VT);
4405     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4406     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4407     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4408     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4409     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4410 
4411     // If dividing by a positive value, we're done. Otherwise, the result must
4412     // be negated.
4413     SDValue Zero = DAG.getConstant(0, DL, VT);
4414     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4415 
4416     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4417     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4418     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4419     return Res;
4420   }
4421 
4422   // If integer divide is expensive and we satisfy the requirements, emit an
4423   // alternate sequence.  Targets may check function attributes for size/speed
4424   // trade-offs.
4425   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4426   if (isConstantOrConstantVector(N1) &&
4427       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4428     if (SDValue Op = BuildSDIV(N))
4429       return Op;
4430 
4431   return SDValue();
4432 }
4433 
4434 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4435   SDValue N0 = N->getOperand(0);
4436   SDValue N1 = N->getOperand(1);
4437   EVT VT = N->getValueType(0);
4438   EVT CCVT = getSetCCResultType(VT);
4439   SDLoc DL(N);
4440 
4441   // fold (udiv c1, c2) -> c1/c2
4442   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4443     return C;
4444 
4445   // fold vector ops
4446   if (VT.isVector())
4447     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4448       return FoldedVOp;
4449 
4450   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4451   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4452   if (N1C && N1C->isAllOnes())
4453     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4454                          DAG.getConstant(1, DL, VT),
4455                          DAG.getConstant(0, DL, VT));
4456 
4457   if (SDValue V = simplifyDivRem(N, DAG))
4458     return V;
4459 
4460   if (SDValue NewSel = foldBinOpIntoSelect(N))
4461     return NewSel;
4462 
4463   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4464     // If the corresponding remainder node exists, update its users with
4465     // (Dividend - (Quotient * Divisor).
4466     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4467                                               { N0, N1 })) {
4468       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4469       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4470       AddToWorklist(Mul.getNode());
4471       AddToWorklist(Sub.getNode());
4472       CombineTo(RemNode, Sub);
4473     }
4474     return V;
4475   }
4476 
4477   // sdiv, srem -> sdivrem
4478   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4479   // true.  Otherwise, we break the simplification logic in visitREM().
4480   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4481   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4482     if (SDValue DivRem = useDivRem(N))
4483         return DivRem;
4484 
4485   return SDValue();
4486 }
4487 
4488 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4489   SDLoc DL(N);
4490   EVT VT = N->getValueType(0);
4491 
4492   // fold (udiv x, (1 << c)) -> x >>u c
4493   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4494       DAG.isKnownToBeAPowerOfTwo(N1)) {
4495     SDValue LogBase2 = BuildLogBase2(N1, DL);
4496     AddToWorklist(LogBase2.getNode());
4497 
4498     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4499     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4500     AddToWorklist(Trunc.getNode());
4501     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4502   }
4503 
4504   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4505   if (N1.getOpcode() == ISD::SHL) {
4506     SDValue N10 = N1.getOperand(0);
4507     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4508         DAG.isKnownToBeAPowerOfTwo(N10)) {
4509       SDValue LogBase2 = BuildLogBase2(N10, DL);
4510       AddToWorklist(LogBase2.getNode());
4511 
4512       EVT ADDVT = N1.getOperand(1).getValueType();
4513       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4514       AddToWorklist(Trunc.getNode());
4515       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4516       AddToWorklist(Add.getNode());
4517       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4518     }
4519   }
4520 
4521   // fold (udiv x, c) -> alternate
4522   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4523   if (isConstantOrConstantVector(N1) &&
4524       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4525     if (SDValue Op = BuildUDIV(N))
4526       return Op;
4527 
4528   return SDValue();
4529 }
4530 
4531 SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4532   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4533       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4534     // Target-specific implementation of srem x, pow2.
4535     if (SDValue Res = BuildSREMPow2(N))
4536       return Res;
4537   }
4538   return SDValue();
4539 }
4540 
4541 // handles ISD::SREM and ISD::UREM
4542 SDValue DAGCombiner::visitREM(SDNode *N) {
4543   unsigned Opcode = N->getOpcode();
4544   SDValue N0 = N->getOperand(0);
4545   SDValue N1 = N->getOperand(1);
4546   EVT VT = N->getValueType(0);
4547   EVT CCVT = getSetCCResultType(VT);
4548 
4549   bool isSigned = (Opcode == ISD::SREM);
4550   SDLoc DL(N);
4551 
4552   // fold (rem c1, c2) -> c1%c2
4553   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4554   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4555     return C;
4556 
4557   // fold (urem X, -1) -> select(FX == -1, 0, FX)
4558   // Freeze the numerator to avoid a miscompile with an undefined value.
4559   if (!isSigned && N1C && N1C->isAllOnes()) {
4560     SDValue F0 = DAG.getFreeze(N0);
4561     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
4562     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
4563   }
4564 
4565   if (SDValue V = simplifyDivRem(N, DAG))
4566     return V;
4567 
4568   if (SDValue NewSel = foldBinOpIntoSelect(N))
4569     return NewSel;
4570 
4571   if (isSigned) {
4572     // If we know the sign bits of both operands are zero, strength reduce to a
4573     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4574     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4575       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4576   } else {
4577     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4578       // fold (urem x, pow2) -> (and x, pow2-1)
4579       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4580       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4581       AddToWorklist(Add.getNode());
4582       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4583     }
4584     if (N1.getOpcode() == ISD::SHL &&
4585         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4586       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4587       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4588       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4589       AddToWorklist(Add.getNode());
4590       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4591     }
4592   }
4593 
4594   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4595 
4596   // If X/C can be simplified by the division-by-constant logic, lower
4597   // X%C to the equivalent of X-X/C*C.
4598   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4599   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4600   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4601   // combine will not return a DIVREM.  Regardless, checking cheapness here
4602   // makes sense since the simplification results in fatter code.
4603   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4604     if (isSigned) {
4605       // check if we can build faster implementation for srem
4606       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
4607         return OptimizedRem;
4608     }
4609 
4610     SDValue OptimizedDiv =
4611         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4612     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
4613       // If the equivalent Div node also exists, update its users.
4614       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4615       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4616                                                 { N0, N1 }))
4617         CombineTo(DivNode, OptimizedDiv);
4618       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4619       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4620       AddToWorklist(OptimizedDiv.getNode());
4621       AddToWorklist(Mul.getNode());
4622       return Sub;
4623     }
4624   }
4625 
4626   // sdiv, srem -> sdivrem
4627   if (SDValue DivRem = useDivRem(N))
4628     return DivRem.getValue(1);
4629 
4630   return SDValue();
4631 }
4632 
4633 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4634   SDValue N0 = N->getOperand(0);
4635   SDValue N1 = N->getOperand(1);
4636   EVT VT = N->getValueType(0);
4637   SDLoc DL(N);
4638 
4639   // fold (mulhs c1, c2)
4640   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
4641     return C;
4642 
4643   // canonicalize constant to RHS.
4644   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4645       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4646     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
4647 
4648   if (VT.isVector()) {
4649     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4650       return FoldedVOp;
4651 
4652     // fold (mulhs x, 0) -> 0
4653     // do not return N1, because undef node may exist.
4654     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4655       return DAG.getConstant(0, DL, VT);
4656   }
4657 
4658   // fold (mulhs x, 0) -> 0
4659   if (isNullConstant(N1))
4660     return N1;
4661 
4662   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4663   if (isOneConstant(N1))
4664     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4665                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4666                                        getShiftAmountTy(N0.getValueType())));
4667 
4668   // fold (mulhs x, undef) -> 0
4669   if (N0.isUndef() || N1.isUndef())
4670     return DAG.getConstant(0, DL, VT);
4671 
4672   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4673   // plus a shift.
4674   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4675       !VT.isVector()) {
4676     MVT Simple = VT.getSimpleVT();
4677     unsigned SimpleSize = Simple.getSizeInBits();
4678     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4679     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4680       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4681       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4682       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4683       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4684             DAG.getConstant(SimpleSize, DL,
4685                             getShiftAmountTy(N1.getValueType())));
4686       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4687     }
4688   }
4689 
4690   return SDValue();
4691 }
4692 
4693 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4694   SDValue N0 = N->getOperand(0);
4695   SDValue N1 = N->getOperand(1);
4696   EVT VT = N->getValueType(0);
4697   SDLoc DL(N);
4698 
4699   // fold (mulhu c1, c2)
4700   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
4701     return C;
4702 
4703   // canonicalize constant to RHS.
4704   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4705       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4706     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
4707 
4708   if (VT.isVector()) {
4709     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4710       return FoldedVOp;
4711 
4712     // fold (mulhu x, 0) -> 0
4713     // do not return N1, because undef node may exist.
4714     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4715       return DAG.getConstant(0, DL, VT);
4716   }
4717 
4718   // fold (mulhu x, 0) -> 0
4719   if (isNullConstant(N1))
4720     return N1;
4721 
4722   // fold (mulhu x, 1) -> 0
4723   if (isOneConstant(N1))
4724     return DAG.getConstant(0, DL, N0.getValueType());
4725 
4726   // fold (mulhu x, undef) -> 0
4727   if (N0.isUndef() || N1.isUndef())
4728     return DAG.getConstant(0, DL, VT);
4729 
4730   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4731   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4732       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4733     unsigned NumEltBits = VT.getScalarSizeInBits();
4734     SDValue LogBase2 = BuildLogBase2(N1, DL);
4735     SDValue SRLAmt = DAG.getNode(
4736         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4737     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4738     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4739     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4740   }
4741 
4742   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4743   // plus a shift.
4744   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4745       !VT.isVector()) {
4746     MVT Simple = VT.getSimpleVT();
4747     unsigned SimpleSize = Simple.getSizeInBits();
4748     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4749     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4750       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4751       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4752       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4753       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4754             DAG.getConstant(SimpleSize, DL,
4755                             getShiftAmountTy(N1.getValueType())));
4756       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4757     }
4758   }
4759 
4760   // Simplify the operands using demanded-bits information.
4761   // We don't have demanded bits support for MULHU so this just enables constant
4762   // folding based on known bits.
4763   if (SimplifyDemandedBits(SDValue(N, 0)))
4764     return SDValue(N, 0);
4765 
4766   return SDValue();
4767 }
4768 
4769 SDValue DAGCombiner::visitAVG(SDNode *N) {
4770   unsigned Opcode = N->getOpcode();
4771   SDValue N0 = N->getOperand(0);
4772   SDValue N1 = N->getOperand(1);
4773   EVT VT = N->getValueType(0);
4774   SDLoc DL(N);
4775 
4776   // fold (avg c1, c2)
4777   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4778     return C;
4779 
4780   // canonicalize constant to RHS.
4781   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4782       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4783     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
4784 
4785   if (VT.isVector()) {
4786     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4787       return FoldedVOp;
4788 
4789     // fold (avgfloor x, 0) -> x >> 1
4790     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
4791       if (Opcode == ISD::AVGFLOORS)
4792         return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
4793       if (Opcode == ISD::AVGFLOORU)
4794         return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
4795     }
4796   }
4797 
4798   // fold (avg x, undef) -> x
4799   if (N0.isUndef())
4800     return N1;
4801   if (N1.isUndef())
4802     return N0;
4803 
4804   // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
4805 
4806   return SDValue();
4807 }
4808 
4809 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4810 /// give the opcodes for the two computations that are being performed. Return
4811 /// true if a simplification was made.
4812 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4813                                                 unsigned HiOp) {
4814   // If the high half is not needed, just compute the low half.
4815   bool HiExists = N->hasAnyUseOfValue(1);
4816   if (!HiExists && (!LegalOperations ||
4817                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4818     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4819     return CombineTo(N, Res, Res);
4820   }
4821 
4822   // If the low half is not needed, just compute the high half.
4823   bool LoExists = N->hasAnyUseOfValue(0);
4824   if (!LoExists && (!LegalOperations ||
4825                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4826     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4827     return CombineTo(N, Res, Res);
4828   }
4829 
4830   // If both halves are used, return as it is.
4831   if (LoExists && HiExists)
4832     return SDValue();
4833 
4834   // If the two computed results can be simplified separately, separate them.
4835   if (LoExists) {
4836     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4837     AddToWorklist(Lo.getNode());
4838     SDValue LoOpt = combine(Lo.getNode());
4839     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4840         (!LegalOperations ||
4841          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4842       return CombineTo(N, LoOpt, LoOpt);
4843   }
4844 
4845   if (HiExists) {
4846     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4847     AddToWorklist(Hi.getNode());
4848     SDValue HiOpt = combine(Hi.getNode());
4849     if (HiOpt.getNode() && HiOpt != Hi &&
4850         (!LegalOperations ||
4851          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4852       return CombineTo(N, HiOpt, HiOpt);
4853   }
4854 
4855   return SDValue();
4856 }
4857 
4858 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4859   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4860     return Res;
4861 
4862   EVT VT = N->getValueType(0);
4863   SDLoc DL(N);
4864 
4865   // If the type is twice as wide is legal, transform the mulhu to a wider
4866   // multiply plus a shift.
4867   if (VT.isSimple() && !VT.isVector()) {
4868     MVT Simple = VT.getSimpleVT();
4869     unsigned SimpleSize = Simple.getSizeInBits();
4870     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4871     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4872       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4873       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4874       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4875       // Compute the high part as N1.
4876       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4877             DAG.getConstant(SimpleSize, DL,
4878                             getShiftAmountTy(Lo.getValueType())));
4879       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4880       // Compute the low part as N0.
4881       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4882       return CombineTo(N, Lo, Hi);
4883     }
4884   }
4885 
4886   return SDValue();
4887 }
4888 
4889 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4890   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4891     return Res;
4892 
4893   EVT VT = N->getValueType(0);
4894   SDLoc DL(N);
4895 
4896   // (umul_lohi N0, 0) -> (0, 0)
4897   if (isNullConstant(N->getOperand(1))) {
4898     SDValue Zero = DAG.getConstant(0, DL, VT);
4899     return CombineTo(N, Zero, Zero);
4900   }
4901 
4902   // (umul_lohi N0, 1) -> (N0, 0)
4903   if (isOneConstant(N->getOperand(1))) {
4904     SDValue Zero = DAG.getConstant(0, DL, VT);
4905     return CombineTo(N, N->getOperand(0), Zero);
4906   }
4907 
4908   // If the type is twice as wide is legal, transform the mulhu to a wider
4909   // multiply plus a shift.
4910   if (VT.isSimple() && !VT.isVector()) {
4911     MVT Simple = VT.getSimpleVT();
4912     unsigned SimpleSize = Simple.getSizeInBits();
4913     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4914     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4915       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4916       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4917       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4918       // Compute the high part as N1.
4919       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4920             DAG.getConstant(SimpleSize, DL,
4921                             getShiftAmountTy(Lo.getValueType())));
4922       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4923       // Compute the low part as N0.
4924       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4925       return CombineTo(N, Lo, Hi);
4926     }
4927   }
4928 
4929   return SDValue();
4930 }
4931 
4932 SDValue DAGCombiner::visitMULO(SDNode *N) {
4933   SDValue N0 = N->getOperand(0);
4934   SDValue N1 = N->getOperand(1);
4935   EVT VT = N0.getValueType();
4936   bool IsSigned = (ISD::SMULO == N->getOpcode());
4937 
4938   EVT CarryVT = N->getValueType(1);
4939   SDLoc DL(N);
4940 
4941   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4942   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4943 
4944   // fold operation with constant operands.
4945   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4946   // multiple results.
4947   if (N0C && N1C) {
4948     bool Overflow;
4949     APInt Result =
4950         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4951                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4952     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4953                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4954   }
4955 
4956   // canonicalize constant to RHS.
4957   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4958       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4959     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4960 
4961   // fold (mulo x, 0) -> 0 + no carry out
4962   if (isNullOrNullSplat(N1))
4963     return CombineTo(N, DAG.getConstant(0, DL, VT),
4964                      DAG.getConstant(0, DL, CarryVT));
4965 
4966   // (mulo x, 2) -> (addo x, x)
4967   // FIXME: This needs a freeze.
4968   if (N1C && N1C->getAPIntValue() == 2 &&
4969       (!IsSigned || VT.getScalarSizeInBits() > 2))
4970     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4971                        N->getVTList(), N0, N0);
4972 
4973   if (IsSigned) {
4974     // A 1 bit SMULO overflows if both inputs are 1.
4975     if (VT.getScalarSizeInBits() == 1) {
4976       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4977       return CombineTo(N, And,
4978                        DAG.getSetCC(DL, CarryVT, And,
4979                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4980     }
4981 
4982     // Multiplying n * m significant bits yields a result of n + m significant
4983     // bits. If the total number of significant bits does not exceed the
4984     // result bit width (minus 1), there is no overflow.
4985     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4986     if (SignBits > 1)
4987       SignBits += DAG.ComputeNumSignBits(N1);
4988     if (SignBits > VT.getScalarSizeInBits() + 1)
4989       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4990                        DAG.getConstant(0, DL, CarryVT));
4991   } else {
4992     KnownBits N1Known = DAG.computeKnownBits(N1);
4993     KnownBits N0Known = DAG.computeKnownBits(N0);
4994     bool Overflow;
4995     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4996     if (!Overflow)
4997       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4998                        DAG.getConstant(0, DL, CarryVT));
4999   }
5000 
5001   return SDValue();
5002 }
5003 
5004 // Function to calculate whether the Min/Max pair of SDNodes (potentially
5005 // swapped around) make a signed saturate pattern, clamping to between a signed
5006 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5007 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
5008 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5009 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
5010 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5011                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
5012                                   bool &Unsigned) {
5013   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5014                             ISD::CondCode CC) {
5015     // The compare and select operand should be the same or the select operands
5016     // should be truncated versions of the comparison.
5017     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5018       return 0;
5019     // The constants need to be the same or a truncated version of each other.
5020     ConstantSDNode *N1C = isConstOrConstSplat(N1);
5021     ConstantSDNode *N3C = isConstOrConstSplat(N3);
5022     if (!N1C || !N3C)
5023       return 0;
5024     const APInt &C1 = N1C->getAPIntValue();
5025     const APInt &C2 = N3C->getAPIntValue();
5026     if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5027       return 0;
5028     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5029   };
5030 
5031   // Check the initial value is a SMIN/SMAX equivalent.
5032   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5033   if (!Opcode0)
5034     return SDValue();
5035 
5036   SDValue N00, N01, N02, N03;
5037   ISD::CondCode N0CC;
5038   switch (N0.getOpcode()) {
5039   case ISD::SMIN:
5040   case ISD::SMAX:
5041     N00 = N02 = N0.getOperand(0);
5042     N01 = N03 = N0.getOperand(1);
5043     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5044     break;
5045   case ISD::SELECT_CC:
5046     N00 = N0.getOperand(0);
5047     N01 = N0.getOperand(1);
5048     N02 = N0.getOperand(2);
5049     N03 = N0.getOperand(3);
5050     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5051     break;
5052   case ISD::SELECT:
5053   case ISD::VSELECT:
5054     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5055       return SDValue();
5056     N00 = N0.getOperand(0).getOperand(0);
5057     N01 = N0.getOperand(0).getOperand(1);
5058     N02 = N0.getOperand(1);
5059     N03 = N0.getOperand(2);
5060     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5061     break;
5062   default:
5063     return SDValue();
5064   }
5065 
5066   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5067   if (!Opcode1 || Opcode0 == Opcode1)
5068     return SDValue();
5069 
5070   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5071   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5072   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5073     return SDValue();
5074 
5075   const APInt &MinC = MinCOp->getAPIntValue();
5076   const APInt &MaxC = MaxCOp->getAPIntValue();
5077   APInt MinCPlus1 = MinC + 1;
5078   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5079     BW = MinCPlus1.exactLogBase2() + 1;
5080     Unsigned = false;
5081     return N02;
5082   }
5083 
5084   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5085     BW = MinCPlus1.exactLogBase2();
5086     Unsigned = true;
5087     return N02;
5088   }
5089 
5090   return SDValue();
5091 }
5092 
5093 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5094                                            SDValue N3, ISD::CondCode CC,
5095                                            SelectionDAG &DAG) {
5096   unsigned BW;
5097   bool Unsigned;
5098   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
5099   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5100     return SDValue();
5101   EVT FPVT = Fp.getOperand(0).getValueType();
5102   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5103   if (FPVT.isVector())
5104     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5105                              FPVT.getVectorElementCount());
5106   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5107   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5108     return SDValue();
5109   SDLoc DL(Fp);
5110   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5111                             DAG.getValueType(NewVT.getScalarType()));
5112   return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
5113                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
5114 }
5115 
5116 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5117                                          SDValue N3, ISD::CondCode CC,
5118                                          SelectionDAG &DAG) {
5119   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5120   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5121   // be truncated versions of the the setcc (N0/N1).
5122   if ((N0 != N2 &&
5123        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5124       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5125     return SDValue();
5126   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5127   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5128   if (!N1C || !N3C)
5129     return SDValue();
5130   const APInt &C1 = N1C->getAPIntValue();
5131   const APInt &C3 = N3C->getAPIntValue();
5132   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5133       C1 != C3.zext(C1.getBitWidth()))
5134     return SDValue();
5135 
5136   unsigned BW = (C1 + 1).exactLogBase2();
5137   EVT FPVT = N0.getOperand(0).getValueType();
5138   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5139   if (FPVT.isVector())
5140     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5141                              FPVT.getVectorElementCount());
5142   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5143                                                         FPVT, NewVT))
5144     return SDValue();
5145 
5146   SDValue Sat =
5147       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5148                   DAG.getValueType(NewVT.getScalarType()));
5149   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5150 }
5151 
5152 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5153   SDValue N0 = N->getOperand(0);
5154   SDValue N1 = N->getOperand(1);
5155   EVT VT = N0.getValueType();
5156   unsigned Opcode = N->getOpcode();
5157   SDLoc DL(N);
5158 
5159   // fold operation with constant operands.
5160   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5161     return C;
5162 
5163   // If the operands are the same, this is a no-op.
5164   if (N0 == N1)
5165     return N0;
5166 
5167   // canonicalize constant to RHS
5168   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5169       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5170     return DAG.getNode(Opcode, DL, VT, N1, N0);
5171 
5172   // fold vector ops
5173   if (VT.isVector())
5174     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5175       return FoldedVOp;
5176 
5177   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5178   // Only do this if the current op isn't legal and the flipped is.
5179   if (!TLI.isOperationLegal(Opcode, VT) &&
5180       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5181       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5182     unsigned AltOpcode;
5183     switch (Opcode) {
5184     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5185     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5186     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5187     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5188     default: llvm_unreachable("Unknown MINMAX opcode");
5189     }
5190     if (TLI.isOperationLegal(AltOpcode, VT))
5191       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5192   }
5193 
5194   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5195     if (SDValue S = PerformMinMaxFpToSatCombine(
5196             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5197       return S;
5198   if (Opcode == ISD::UMIN)
5199     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5200       return S;
5201 
5202   // Simplify the operands using demanded-bits information.
5203   if (SimplifyDemandedBits(SDValue(N, 0)))
5204     return SDValue(N, 0);
5205 
5206   return SDValue();
5207 }
5208 
5209 /// If this is a bitwise logic instruction and both operands have the same
5210 /// opcode, try to sink the other opcode after the logic instruction.
5211 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5212   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5213   EVT VT = N0.getValueType();
5214   unsigned LogicOpcode = N->getOpcode();
5215   unsigned HandOpcode = N0.getOpcode();
5216   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
5217           LogicOpcode == ISD::XOR) && "Expected logic opcode");
5218   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5219 
5220   // Bail early if none of these transforms apply.
5221   if (N0.getNumOperands() == 0)
5222     return SDValue();
5223 
5224   // FIXME: We should check number of uses of the operands to not increase
5225   //        the instruction count for all transforms.
5226 
5227   // Handle size-changing casts.
5228   SDValue X = N0.getOperand(0);
5229   SDValue Y = N1.getOperand(0);
5230   EVT XVT = X.getValueType();
5231   SDLoc DL(N);
5232   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
5233       HandOpcode == ISD::SIGN_EXTEND) {
5234     // If both operands have other uses, this transform would create extra
5235     // instructions without eliminating anything.
5236     if (!N0.hasOneUse() && !N1.hasOneUse())
5237       return SDValue();
5238     // We need matching integer source types.
5239     if (XVT != Y.getValueType())
5240       return SDValue();
5241     // Don't create an illegal op during or after legalization. Don't ever
5242     // create an unsupported vector op.
5243     if ((VT.isVector() || LegalOperations) &&
5244         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5245       return SDValue();
5246     // Avoid infinite looping with PromoteIntBinOp.
5247     // TODO: Should we apply desirable/legal constraints to all opcodes?
5248     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
5249         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5250       return SDValue();
5251     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5252     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5253     return DAG.getNode(HandOpcode, DL, VT, Logic);
5254   }
5255 
5256   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5257   if (HandOpcode == ISD::TRUNCATE) {
5258     // If both operands have other uses, this transform would create extra
5259     // instructions without eliminating anything.
5260     if (!N0.hasOneUse() && !N1.hasOneUse())
5261       return SDValue();
5262     // We need matching source types.
5263     if (XVT != Y.getValueType())
5264       return SDValue();
5265     // Don't create an illegal op during or after legalization.
5266     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5267       return SDValue();
5268     // Be extra careful sinking truncate. If it's free, there's no benefit in
5269     // widening a binop. Also, don't create a logic op on an illegal type.
5270     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5271       return SDValue();
5272     if (!TLI.isTypeLegal(XVT))
5273       return SDValue();
5274     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5275     return DAG.getNode(HandOpcode, DL, VT, Logic);
5276   }
5277 
5278   // For binops SHL/SRL/SRA/AND:
5279   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5280   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5281        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5282       N0.getOperand(1) == N1.getOperand(1)) {
5283     // If either operand has other uses, this transform is not an improvement.
5284     if (!N0.hasOneUse() || !N1.hasOneUse())
5285       return SDValue();
5286     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5287     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5288   }
5289 
5290   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5291   if (HandOpcode == ISD::BSWAP) {
5292     // If either operand has other uses, this transform is not an improvement.
5293     if (!N0.hasOneUse() || !N1.hasOneUse())
5294       return SDValue();
5295     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5296     return DAG.getNode(HandOpcode, DL, VT, Logic);
5297   }
5298 
5299   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5300   // Only perform this optimization up until type legalization, before
5301   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5302   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5303   // we don't want to undo this promotion.
5304   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5305   // on scalars.
5306   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5307        Level <= AfterLegalizeTypes) {
5308     // Input types must be integer and the same.
5309     if (XVT.isInteger() && XVT == Y.getValueType() &&
5310         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5311           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5312       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5313       return DAG.getNode(HandOpcode, DL, VT, Logic);
5314     }
5315   }
5316 
5317   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5318   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5319   // If both shuffles use the same mask, and both shuffle within a single
5320   // vector, then it is worthwhile to move the swizzle after the operation.
5321   // The type-legalizer generates this pattern when loading illegal
5322   // vector types from memory. In many cases this allows additional shuffle
5323   // optimizations.
5324   // There are other cases where moving the shuffle after the xor/and/or
5325   // is profitable even if shuffles don't perform a swizzle.
5326   // If both shuffles use the same mask, and both shuffles have the same first
5327   // or second operand, then it might still be profitable to move the shuffle
5328   // after the xor/and/or operation.
5329   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5330     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5331     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5332     assert(X.getValueType() == Y.getValueType() &&
5333            "Inputs to shuffles are not the same type");
5334 
5335     // Check that both shuffles use the same mask. The masks are known to be of
5336     // the same length because the result vector type is the same.
5337     // Check also that shuffles have only one use to avoid introducing extra
5338     // instructions.
5339     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5340         !SVN0->getMask().equals(SVN1->getMask()))
5341       return SDValue();
5342 
5343     // Don't try to fold this node if it requires introducing a
5344     // build vector of all zeros that might be illegal at this stage.
5345     SDValue ShOp = N0.getOperand(1);
5346     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5347       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5348 
5349     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5350     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5351       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5352                                   N0.getOperand(0), N1.getOperand(0));
5353       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5354     }
5355 
5356     // Don't try to fold this node if it requires introducing a
5357     // build vector of all zeros that might be illegal at this stage.
5358     ShOp = N0.getOperand(0);
5359     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5360       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5361 
5362     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5363     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5364       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5365                                   N1.getOperand(1));
5366       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5367     }
5368   }
5369 
5370   return SDValue();
5371 }
5372 
5373 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5374 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5375                                        const SDLoc &DL) {
5376   SDValue LL, LR, RL, RR, N0CC, N1CC;
5377   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5378       !isSetCCEquivalent(N1, RL, RR, N1CC))
5379     return SDValue();
5380 
5381   assert(N0.getValueType() == N1.getValueType() &&
5382          "Unexpected operand types for bitwise logic op");
5383   assert(LL.getValueType() == LR.getValueType() &&
5384          RL.getValueType() == RR.getValueType() &&
5385          "Unexpected operand types for setcc");
5386 
5387   // If we're here post-legalization or the logic op type is not i1, the logic
5388   // op type must match a setcc result type. Also, all folds require new
5389   // operations on the left and right operands, so those types must match.
5390   EVT VT = N0.getValueType();
5391   EVT OpVT = LL.getValueType();
5392   if (LegalOperations || VT.getScalarType() != MVT::i1)
5393     if (VT != getSetCCResultType(OpVT))
5394       return SDValue();
5395   if (OpVT != RL.getValueType())
5396     return SDValue();
5397 
5398   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5399   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5400   bool IsInteger = OpVT.isInteger();
5401   if (LR == RR && CC0 == CC1 && IsInteger) {
5402     bool IsZero = isNullOrNullSplat(LR);
5403     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5404 
5405     // All bits clear?
5406     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5407     // All sign bits clear?
5408     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5409     // Any bits set?
5410     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5411     // Any sign bits set?
5412     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5413 
5414     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5415     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5416     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5417     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5418     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5419       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5420       AddToWorklist(Or.getNode());
5421       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5422     }
5423 
5424     // All bits set?
5425     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5426     // All sign bits set?
5427     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5428     // Any bits clear?
5429     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5430     // Any sign bits clear?
5431     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5432 
5433     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5434     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5435     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5436     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5437     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5438       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5439       AddToWorklist(And.getNode());
5440       return DAG.getSetCC(DL, VT, And, LR, CC1);
5441     }
5442   }
5443 
5444   // TODO: What is the 'or' equivalent of this fold?
5445   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5446   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5447       IsInteger && CC0 == ISD::SETNE &&
5448       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5449        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5450     SDValue One = DAG.getConstant(1, DL, OpVT);
5451     SDValue Two = DAG.getConstant(2, DL, OpVT);
5452     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5453     AddToWorklist(Add.getNode());
5454     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5455   }
5456 
5457   // Try more general transforms if the predicates match and the only user of
5458   // the compares is the 'and' or 'or'.
5459   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5460       N0.hasOneUse() && N1.hasOneUse()) {
5461     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5462     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5463     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5464       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5465       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5466       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5467       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5468       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5469     }
5470 
5471     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5472     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5473       // Match a shared variable operand and 2 non-opaque constant operands.
5474       auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
5475         // The difference of the constants must be a single bit.
5476         const APInt &CMax =
5477             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5478         const APInt &CMin =
5479             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5480         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
5481       };
5482       if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
5483         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5484         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5485         SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5486         SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5487         SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5488         SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5489         SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5490         SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5491         SDValue Zero = DAG.getConstant(0, DL, OpVT);
5492         return DAG.getSetCC(DL, VT, And, Zero, CC0);
5493       }
5494     }
5495   }
5496 
5497   // Canonicalize equivalent operands to LL == RL.
5498   if (LL == RR && LR == RL) {
5499     CC1 = ISD::getSetCCSwappedOperands(CC1);
5500     std::swap(RL, RR);
5501   }
5502 
5503   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5504   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5505   if (LL == RL && LR == RR) {
5506     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5507                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5508     if (NewCC != ISD::SETCC_INVALID &&
5509         (!LegalOperations ||
5510          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5511           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5512       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5513   }
5514 
5515   return SDValue();
5516 }
5517 
5518 /// This contains all DAGCombine rules which reduce two values combined by
5519 /// an And operation to a single value. This makes them reusable in the context
5520 /// of visitSELECT(). Rules involving constants are not included as
5521 /// visitSELECT() already handles those cases.
5522 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5523   EVT VT = N1.getValueType();
5524   SDLoc DL(N);
5525 
5526   // fold (and x, undef) -> 0
5527   if (N0.isUndef() || N1.isUndef())
5528     return DAG.getConstant(0, DL, VT);
5529 
5530   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5531     return V;
5532 
5533   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
5534   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5535       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
5536     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5537       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5538         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5539         // immediate for an add, but it is legal if its top c2 bits are set,
5540         // transform the ADD so the immediate doesn't need to be materialized
5541         // in a register.
5542         APInt ADDC = ADDI->getAPIntValue();
5543         APInt SRLC = SRLI->getAPIntValue();
5544         if (ADDC.getMinSignedBits() <= 64 &&
5545             SRLC.ult(VT.getSizeInBits()) &&
5546             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5547           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5548                                              SRLC.getZExtValue());
5549           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5550             ADDC |= Mask;
5551             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5552               SDLoc DL0(N0);
5553               SDValue NewAdd =
5554                 DAG.getNode(ISD::ADD, DL0, VT,
5555                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5556               CombineTo(N0.getNode(), NewAdd);
5557               // Return N so it doesn't get rechecked!
5558               return SDValue(N, 0);
5559             }
5560           }
5561         }
5562       }
5563     }
5564   }
5565 
5566   // Reduce bit extract of low half of an integer to the narrower type.
5567   // (and (srl i64:x, K), KMask) ->
5568   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5569   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5570     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5571       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5572         unsigned Size = VT.getSizeInBits();
5573         const APInt &AndMask = CAnd->getAPIntValue();
5574         unsigned ShiftBits = CShift->getZExtValue();
5575 
5576         // Bail out, this node will probably disappear anyway.
5577         if (ShiftBits == 0)
5578           return SDValue();
5579 
5580         unsigned MaskBits = AndMask.countTrailingOnes();
5581         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5582 
5583         if (AndMask.isMask() &&
5584             // Required bits must not span the two halves of the integer and
5585             // must fit in the half size type.
5586             (ShiftBits + MaskBits <= Size / 2) &&
5587             TLI.isNarrowingProfitable(VT, HalfVT) &&
5588             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5589             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5590             TLI.isTruncateFree(VT, HalfVT) &&
5591             TLI.isZExtFree(HalfVT, VT)) {
5592           // The isNarrowingProfitable is to avoid regressions on PPC and
5593           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5594           // on downstream users of this. Those patterns could probably be
5595           // extended to handle extensions mixed in.
5596 
5597           SDValue SL(N0);
5598           assert(MaskBits <= Size);
5599 
5600           // Extracting the highest bit of the low half.
5601           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5602           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5603                                       N0.getOperand(0));
5604 
5605           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5606           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5607           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5608           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5609           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5610         }
5611       }
5612     }
5613   }
5614 
5615   return SDValue();
5616 }
5617 
5618 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5619                                    EVT LoadResultTy, EVT &ExtVT) {
5620   if (!AndC->getAPIntValue().isMask())
5621     return false;
5622 
5623   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5624 
5625   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5626   EVT LoadedVT = LoadN->getMemoryVT();
5627 
5628   if (ExtVT == LoadedVT &&
5629       (!LegalOperations ||
5630        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5631     // ZEXTLOAD will match without needing to change the size of the value being
5632     // loaded.
5633     return true;
5634   }
5635 
5636   // Do not change the width of a volatile or atomic loads.
5637   if (!LoadN->isSimple())
5638     return false;
5639 
5640   // Do not generate loads of non-round integer types since these can
5641   // be expensive (and would be wrong if the type is not byte sized).
5642   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5643     return false;
5644 
5645   if (LegalOperations &&
5646       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5647     return false;
5648 
5649   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5650     return false;
5651 
5652   return true;
5653 }
5654 
5655 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5656                                     ISD::LoadExtType ExtType, EVT &MemVT,
5657                                     unsigned ShAmt) {
5658   if (!LDST)
5659     return false;
5660   // Only allow byte offsets.
5661   if (ShAmt % 8)
5662     return false;
5663 
5664   // Do not generate loads of non-round integer types since these can
5665   // be expensive (and would be wrong if the type is not byte sized).
5666   if (!MemVT.isRound())
5667     return false;
5668 
5669   // Don't change the width of a volatile or atomic loads.
5670   if (!LDST->isSimple())
5671     return false;
5672 
5673   EVT LdStMemVT = LDST->getMemoryVT();
5674 
5675   // Bail out when changing the scalable property, since we can't be sure that
5676   // we're actually narrowing here.
5677   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5678     return false;
5679 
5680   // Verify that we are actually reducing a load width here.
5681   if (LdStMemVT.bitsLT(MemVT))
5682     return false;
5683 
5684   // Ensure that this isn't going to produce an unsupported memory access.
5685   if (ShAmt) {
5686     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5687     const unsigned ByteShAmt = ShAmt / 8;
5688     const Align LDSTAlign = LDST->getAlign();
5689     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5690     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5691                                 LDST->getAddressSpace(), NarrowAlign,
5692                                 LDST->getMemOperand()->getFlags()))
5693       return false;
5694   }
5695 
5696   // It's not possible to generate a constant of extended or untyped type.
5697   EVT PtrType = LDST->getBasePtr().getValueType();
5698   if (PtrType == MVT::Untyped || PtrType.isExtended())
5699     return false;
5700 
5701   if (isa<LoadSDNode>(LDST)) {
5702     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5703     // Don't transform one with multiple uses, this would require adding a new
5704     // load.
5705     if (!SDValue(Load, 0).hasOneUse())
5706       return false;
5707 
5708     if (LegalOperations &&
5709         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5710       return false;
5711 
5712     // For the transform to be legal, the load must produce only two values
5713     // (the value loaded and the chain).  Don't transform a pre-increment
5714     // load, for example, which produces an extra value.  Otherwise the
5715     // transformation is not equivalent, and the downstream logic to replace
5716     // uses gets things wrong.
5717     if (Load->getNumValues() > 2)
5718       return false;
5719 
5720     // If the load that we're shrinking is an extload and we're not just
5721     // discarding the extension we can't simply shrink the load. Bail.
5722     // TODO: It would be possible to merge the extensions in some cases.
5723     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5724         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5725       return false;
5726 
5727     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5728       return false;
5729   } else {
5730     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5731     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5732     // Can't write outside the original store
5733     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5734       return false;
5735 
5736     if (LegalOperations &&
5737         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5738       return false;
5739   }
5740   return true;
5741 }
5742 
5743 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5744                                     SmallVectorImpl<LoadSDNode*> &Loads,
5745                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5746                                     ConstantSDNode *Mask,
5747                                     SDNode *&NodeToMask) {
5748   // Recursively search for the operands, looking for loads which can be
5749   // narrowed.
5750   for (SDValue Op : N->op_values()) {
5751     if (Op.getValueType().isVector())
5752       return false;
5753 
5754     // Some constants may need fixing up later if they are too large.
5755     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5756       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5757           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5758         NodesWithConsts.insert(N);
5759       continue;
5760     }
5761 
5762     if (!Op.hasOneUse())
5763       return false;
5764 
5765     switch(Op.getOpcode()) {
5766     case ISD::LOAD: {
5767       auto *Load = cast<LoadSDNode>(Op);
5768       EVT ExtVT;
5769       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5770           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5771 
5772         // ZEXTLOAD is already small enough.
5773         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5774             ExtVT.bitsGE(Load->getMemoryVT()))
5775           continue;
5776 
5777         // Use LE to convert equal sized loads to zext.
5778         if (ExtVT.bitsLE(Load->getMemoryVT()))
5779           Loads.push_back(Load);
5780 
5781         continue;
5782       }
5783       return false;
5784     }
5785     case ISD::ZERO_EXTEND:
5786     case ISD::AssertZext: {
5787       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5788       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5789       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5790         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5791         Op.getOperand(0).getValueType();
5792 
5793       // We can accept extending nodes if the mask is wider or an equal
5794       // width to the original type.
5795       if (ExtVT.bitsGE(VT))
5796         continue;
5797       break;
5798     }
5799     case ISD::OR:
5800     case ISD::XOR:
5801     case ISD::AND:
5802       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5803                              NodeToMask))
5804         return false;
5805       continue;
5806     }
5807 
5808     // Allow one node which will masked along with any loads found.
5809     if (NodeToMask)
5810       return false;
5811 
5812     // Also ensure that the node to be masked only produces one data result.
5813     NodeToMask = Op.getNode();
5814     if (NodeToMask->getNumValues() > 1) {
5815       bool HasValue = false;
5816       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5817         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5818         if (VT != MVT::Glue && VT != MVT::Other) {
5819           if (HasValue) {
5820             NodeToMask = nullptr;
5821             return false;
5822           }
5823           HasValue = true;
5824         }
5825       }
5826       assert(HasValue && "Node to be masked has no data result?");
5827     }
5828   }
5829   return true;
5830 }
5831 
5832 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5833   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5834   if (!Mask)
5835     return false;
5836 
5837   if (!Mask->getAPIntValue().isMask())
5838     return false;
5839 
5840   // No need to do anything if the and directly uses a load.
5841   if (isa<LoadSDNode>(N->getOperand(0)))
5842     return false;
5843 
5844   SmallVector<LoadSDNode*, 8> Loads;
5845   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5846   SDNode *FixupNode = nullptr;
5847   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5848     if (Loads.size() == 0)
5849       return false;
5850 
5851     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5852     SDValue MaskOp = N->getOperand(1);
5853 
5854     // If it exists, fixup the single node we allow in the tree that needs
5855     // masking.
5856     if (FixupNode) {
5857       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5858       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5859                                 FixupNode->getValueType(0),
5860                                 SDValue(FixupNode, 0), MaskOp);
5861       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5862       if (And.getOpcode() == ISD ::AND)
5863         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5864     }
5865 
5866     // Narrow any constants that need it.
5867     for (auto *LogicN : NodesWithConsts) {
5868       SDValue Op0 = LogicN->getOperand(0);
5869       SDValue Op1 = LogicN->getOperand(1);
5870 
5871       if (isa<ConstantSDNode>(Op0))
5872           std::swap(Op0, Op1);
5873 
5874       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5875                                 Op1, MaskOp);
5876 
5877       DAG.UpdateNodeOperands(LogicN, Op0, And);
5878     }
5879 
5880     // Create narrow loads.
5881     for (auto *Load : Loads) {
5882       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5883       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5884                                 SDValue(Load, 0), MaskOp);
5885       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5886       if (And.getOpcode() == ISD ::AND)
5887         And = SDValue(
5888             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5889       SDValue NewLoad = reduceLoadWidth(And.getNode());
5890       assert(NewLoad &&
5891              "Shouldn't be masking the load if it can't be narrowed");
5892       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5893     }
5894     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5895     return true;
5896   }
5897   return false;
5898 }
5899 
5900 // Unfold
5901 //    x &  (-1 'logical shift' y)
5902 // To
5903 //    (x 'opposite logical shift' y) 'logical shift' y
5904 // if it is better for performance.
5905 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5906   assert(N->getOpcode() == ISD::AND);
5907 
5908   SDValue N0 = N->getOperand(0);
5909   SDValue N1 = N->getOperand(1);
5910 
5911   // Do we actually prefer shifts over mask?
5912   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5913     return SDValue();
5914 
5915   // Try to match  (-1 '[outer] logical shift' y)
5916   unsigned OuterShift;
5917   unsigned InnerShift; // The opposite direction to the OuterShift.
5918   SDValue Y;           // Shift amount.
5919   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5920     if (!M.hasOneUse())
5921       return false;
5922     OuterShift = M->getOpcode();
5923     if (OuterShift == ISD::SHL)
5924       InnerShift = ISD::SRL;
5925     else if (OuterShift == ISD::SRL)
5926       InnerShift = ISD::SHL;
5927     else
5928       return false;
5929     if (!isAllOnesConstant(M->getOperand(0)))
5930       return false;
5931     Y = M->getOperand(1);
5932     return true;
5933   };
5934 
5935   SDValue X;
5936   if (matchMask(N1))
5937     X = N0;
5938   else if (matchMask(N0))
5939     X = N1;
5940   else
5941     return SDValue();
5942 
5943   SDLoc DL(N);
5944   EVT VT = N->getValueType(0);
5945 
5946   //     tmp = x   'opposite logical shift' y
5947   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5948   //     ret = tmp 'logical shift' y
5949   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5950 
5951   return T1;
5952 }
5953 
5954 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5955 /// For a target with a bit test, this is expected to become test + set and save
5956 /// at least 1 instruction.
5957 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5958   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5959 
5960   // This is probably not worthwhile without a supported type.
5961   EVT VT = And->getValueType(0);
5962   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5963   if (!TLI.isTypeLegal(VT))
5964     return SDValue();
5965 
5966   // Look through an optional extension and find a 'not'.
5967   // TODO: Should we favor test+set even without the 'not' op?
5968   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5969   if (Not.getOpcode() == ISD::ANY_EXTEND)
5970     Not = Not.getOperand(0);
5971   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5972     return SDValue();
5973 
5974   // Look though an optional truncation. The source operand may not be the same
5975   // type as the original 'and', but that is ok because we are masking off
5976   // everything but the low bit.
5977   SDValue Srl = Not.getOperand(0);
5978   if (Srl.getOpcode() == ISD::TRUNCATE)
5979     Srl = Srl.getOperand(0);
5980 
5981   // Match a shift-right by constant.
5982   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5983       !isa<ConstantSDNode>(Srl.getOperand(1)))
5984     return SDValue();
5985 
5986   // We might have looked through casts that make this transform invalid.
5987   // TODO: If the source type is wider than the result type, do the mask and
5988   //       compare in the source type.
5989   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5990   unsigned VTBitWidth = VT.getSizeInBits();
5991   if (ShiftAmt.uge(VTBitWidth))
5992     return SDValue();
5993 
5994   if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1)))
5995     return SDValue();
5996 
5997   // Turn this into a bit-test pattern using mask op + setcc:
5998   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5999   SDLoc DL(And);
6000   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
6001   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6002   SDValue Mask = DAG.getConstant(
6003       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
6004   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
6005   SDValue Zero = DAG.getConstant(0, DL, VT);
6006   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6007   return DAG.getZExtOrTrunc(Setcc, DL, VT);
6008 }
6009 
6010 /// For targets that support usubsat, match a bit-hack form of that operation
6011 /// that ends in 'and' and convert it.
6012 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
6013   SDValue N0 = N->getOperand(0);
6014   SDValue N1 = N->getOperand(1);
6015   EVT VT = N1.getValueType();
6016 
6017   // Canonicalize SRA as operand 1.
6018   if (N0.getOpcode() == ISD::SRA)
6019     std::swap(N0, N1);
6020 
6021   // xor/add with SMIN (signmask) are logically equivalent.
6022   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
6023     return SDValue();
6024 
6025   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
6026       N0.getOperand(0) != N1.getOperand(0))
6027     return SDValue();
6028 
6029   unsigned BitWidth = VT.getScalarSizeInBits();
6030   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6031   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6032   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6033       !SraC || SraC->getAPIntValue() != BitWidth - 1)
6034     return SDValue();
6035 
6036   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6037   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6038   SDLoc DL(N);
6039   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6040   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6041 }
6042 
6043 /// Given a bitwise logic operation N with a matching bitwise logic operand,
6044 /// fold a pattern where 2 of the source operands are identically shifted
6045 /// values. For example:
6046 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6047 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6048                                  SelectionDAG &DAG) {
6049   unsigned LogicOpcode = N->getOpcode();
6050   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
6051           LogicOpcode == ISD::XOR)
6052          && "Expected bitwise logic operation");
6053 
6054   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6055     return SDValue();
6056 
6057   // Match another bitwise logic op and a shift.
6058   unsigned ShiftOpcode = ShiftOp.getOpcode();
6059   if (LogicOp.getOpcode() != LogicOpcode ||
6060       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6061         ShiftOpcode == ISD::SRA))
6062     return SDValue();
6063 
6064   // Match another shift op inside the first logic operand. Handle both commuted
6065   // possibilities.
6066   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6067   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6068   SDValue X1 = ShiftOp.getOperand(0);
6069   SDValue Y = ShiftOp.getOperand(1);
6070   SDValue X0, Z;
6071   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6072       LogicOp.getOperand(0).getOperand(1) == Y) {
6073     X0 = LogicOp.getOperand(0).getOperand(0);
6074     Z = LogicOp.getOperand(1);
6075   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6076              LogicOp.getOperand(1).getOperand(1) == Y) {
6077     X0 = LogicOp.getOperand(1).getOperand(0);
6078     Z = LogicOp.getOperand(0);
6079   } else {
6080     return SDValue();
6081   }
6082 
6083   EVT VT = N->getValueType(0);
6084   SDLoc DL(N);
6085   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6086   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6087   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6088 }
6089 
6090 SDValue DAGCombiner::visitAND(SDNode *N) {
6091   SDValue N0 = N->getOperand(0);
6092   SDValue N1 = N->getOperand(1);
6093   EVT VT = N1.getValueType();
6094 
6095   // x & x --> x
6096   if (N0 == N1)
6097     return N0;
6098 
6099   // fold (and c1, c2) -> c1&c2
6100   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6101     return C;
6102 
6103   // canonicalize constant to RHS
6104   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6105       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6106     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6107 
6108   // fold vector ops
6109   if (VT.isVector()) {
6110     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6111       return FoldedVOp;
6112 
6113     // fold (and x, 0) -> 0, vector edition
6114     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6115       // do not return N1, because undef node may exist in N1
6116       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6117                              SDLoc(N), N1.getValueType());
6118 
6119     // fold (and x, -1) -> x, vector edition
6120     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6121       return N0;
6122 
6123     // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6124     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6125     ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6126     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() &&
6127         Splat && N1.hasOneUse()) {
6128       EVT LoadVT = MLoad->getMemoryVT();
6129       EVT ExtVT = VT;
6130       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6131         // For this AND to be a zero extension of the masked load the elements
6132         // of the BuildVec must mask the bottom bits of the extended element
6133         // type
6134         uint64_t ElementSize =
6135             LoadVT.getVectorElementType().getScalarSizeInBits();
6136         if (Splat->getAPIntValue().isMask(ElementSize)) {
6137           return DAG.getMaskedLoad(
6138               ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6139               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6140               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6141               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6142         }
6143       }
6144     }
6145   }
6146 
6147   // fold (and x, -1) -> x
6148   if (isAllOnesConstant(N1))
6149     return N0;
6150 
6151   // if (and x, c) is known to be zero, return 0
6152   unsigned BitWidth = VT.getScalarSizeInBits();
6153   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6154   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
6155     return DAG.getConstant(0, SDLoc(N), VT);
6156 
6157   if (SDValue NewSel = foldBinOpIntoSelect(N))
6158     return NewSel;
6159 
6160   // reassociate and
6161   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
6162     return RAND;
6163 
6164   // Try to convert a constant mask AND into a shuffle clear mask.
6165   if (VT.isVector())
6166     if (SDValue Shuffle = XformToShuffleWithZero(N))
6167       return Shuffle;
6168 
6169   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6170     return Combined;
6171 
6172   // fold (and (or x, C), D) -> D if (C & D) == D
6173   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
6174     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
6175   };
6176   if (N0.getOpcode() == ISD::OR &&
6177       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
6178     return N1;
6179   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
6180   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
6181     SDValue N0Op0 = N0.getOperand(0);
6182     APInt Mask = ~N1C->getAPIntValue();
6183     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
6184     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
6185       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
6186                                  N0.getValueType(), N0Op0);
6187 
6188       // Replace uses of the AND with uses of the Zero extend node.
6189       CombineTo(N, Zext);
6190 
6191       // We actually want to replace all uses of the any_extend with the
6192       // zero_extend, to avoid duplicating things.  This will later cause this
6193       // AND to be folded.
6194       CombineTo(N0.getNode(), Zext);
6195       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
6196     }
6197   }
6198 
6199   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
6200   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
6201   // already be zero by virtue of the width of the base type of the load.
6202   //
6203   // the 'X' node here can either be nothing or an extract_vector_elt to catch
6204   // more cases.
6205   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6206        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
6207        N0.getOperand(0).getOpcode() == ISD::LOAD &&
6208        N0.getOperand(0).getResNo() == 0) ||
6209       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
6210     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
6211                                          N0 : N0.getOperand(0) );
6212 
6213     // Get the constant (if applicable) the zero'th operand is being ANDed with.
6214     // This can be a pure constant or a vector splat, in which case we treat the
6215     // vector as a scalar and use the splat value.
6216     APInt Constant = APInt::getZero(1);
6217     if (const ConstantSDNode *C = isConstOrConstSplat(N1)) {
6218       Constant = C->getAPIntValue();
6219     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
6220       APInt SplatValue, SplatUndef;
6221       unsigned SplatBitSize;
6222       bool HasAnyUndefs;
6223       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
6224                                              SplatBitSize, HasAnyUndefs);
6225       if (IsSplat) {
6226         // Undef bits can contribute to a possible optimisation if set, so
6227         // set them.
6228         SplatValue |= SplatUndef;
6229 
6230         // The splat value may be something like "0x00FFFFFF", which means 0 for
6231         // the first vector value and FF for the rest, repeating. We need a mask
6232         // that will apply equally to all members of the vector, so AND all the
6233         // lanes of the constant together.
6234         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
6235 
6236         // If the splat value has been compressed to a bitlength lower
6237         // than the size of the vector lane, we need to re-expand it to
6238         // the lane size.
6239         if (EltBitWidth > SplatBitSize)
6240           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
6241                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
6242             SplatValue |= SplatValue.shl(SplatBitSize);
6243 
6244         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
6245         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
6246         if ((SplatBitSize % EltBitWidth) == 0) {
6247           Constant = APInt::getAllOnes(EltBitWidth);
6248           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
6249             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
6250         }
6251       }
6252     }
6253 
6254     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
6255     // actually legal and isn't going to get expanded, else this is a false
6256     // optimisation.
6257     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
6258                                                     Load->getValueType(0),
6259                                                     Load->getMemoryVT());
6260 
6261     // Resize the constant to the same size as the original memory access before
6262     // extension. If it is still the AllOnesValue then this AND is completely
6263     // unneeded.
6264     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
6265 
6266     bool B;
6267     switch (Load->getExtensionType()) {
6268     default: B = false; break;
6269     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
6270     case ISD::ZEXTLOAD:
6271     case ISD::NON_EXTLOAD: B = true; break;
6272     }
6273 
6274     if (B && Constant.isAllOnes()) {
6275       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
6276       // preserve semantics once we get rid of the AND.
6277       SDValue NewLoad(Load, 0);
6278 
6279       // Fold the AND away. NewLoad may get replaced immediately.
6280       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
6281 
6282       if (Load->getExtensionType() == ISD::EXTLOAD) {
6283         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
6284                               Load->getValueType(0), SDLoc(Load),
6285                               Load->getChain(), Load->getBasePtr(),
6286                               Load->getOffset(), Load->getMemoryVT(),
6287                               Load->getMemOperand());
6288         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
6289         if (Load->getNumValues() == 3) {
6290           // PRE/POST_INC loads have 3 values.
6291           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
6292                            NewLoad.getValue(2) };
6293           CombineTo(Load, To, 3, true);
6294         } else {
6295           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
6296         }
6297       }
6298 
6299       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6300     }
6301   }
6302 
6303   // fold (and (masked_gather x)) -> (zext_masked_gather x)
6304   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
6305     EVT MemVT = GN0->getMemoryVT();
6306     EVT ScalarVT = MemVT.getScalarType();
6307 
6308     if (SDValue(GN0, 0).hasOneUse() &&
6309         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
6310         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
6311       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
6312                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
6313 
6314       SDValue ZExtLoad = DAG.getMaskedGather(
6315           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
6316           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
6317 
6318       CombineTo(N, ZExtLoad);
6319       AddToWorklist(ZExtLoad.getNode());
6320       // Avoid recheck of N.
6321       return SDValue(N, 0);
6322     }
6323   }
6324 
6325   // fold (and (load x), 255) -> (zextload x, i8)
6326   // fold (and (extload x, i16), 255) -> (zextload x, i8)
6327   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
6328   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
6329                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
6330                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
6331     if (SDValue Res = reduceLoadWidth(N)) {
6332       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
6333         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
6334       AddToWorklist(N);
6335       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
6336       return SDValue(N, 0);
6337     }
6338   }
6339 
6340   if (LegalTypes) {
6341     // Attempt to propagate the AND back up to the leaves which, if they're
6342     // loads, can be combined to narrow loads and the AND node can be removed.
6343     // Perform after legalization so that extend nodes will already be
6344     // combined into the loads.
6345     if (BackwardsPropagateMask(N))
6346       return SDValue(N, 0);
6347   }
6348 
6349   if (SDValue Combined = visitANDLike(N0, N1, N))
6350     return Combined;
6351 
6352   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
6353   if (N0.getOpcode() == N1.getOpcode())
6354     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6355       return V;
6356 
6357   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6358     return R;
6359   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
6360     return R;
6361 
6362   // Masking the negated extension of a boolean is just the zero-extended
6363   // boolean:
6364   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
6365   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
6366   //
6367   // Note: the SimplifyDemandedBits fold below can make an information-losing
6368   // transform, and then we have no way to find this better fold.
6369   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
6370     if (isNullOrNullSplat(N0.getOperand(0))) {
6371       SDValue SubRHS = N0.getOperand(1);
6372       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
6373           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6374         return SubRHS;
6375       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
6376           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
6377         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
6378     }
6379   }
6380 
6381   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
6382   // fold (and (sra)) -> (and (srl)) when possible.
6383   if (SimplifyDemandedBits(SDValue(N, 0)))
6384     return SDValue(N, 0);
6385 
6386   // fold (zext_inreg (extload x)) -> (zextload x)
6387   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
6388   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
6389       (ISD::isEXTLoad(N0.getNode()) ||
6390        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
6391     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6392     EVT MemVT = LN0->getMemoryVT();
6393     // If we zero all the possible extended bits, then we can turn this into
6394     // a zextload if we are running before legalize or the operation is legal.
6395     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
6396     unsigned MemBitSize = MemVT.getScalarSizeInBits();
6397     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
6398     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
6399         ((!LegalOperations && LN0->isSimple()) ||
6400          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
6401       SDValue ExtLoad =
6402           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
6403                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
6404       AddToWorklist(N);
6405       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6406       return SDValue(N, 0); // Return N so it doesn't get rechecked!
6407     }
6408   }
6409 
6410   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
6411   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
6412     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6413                                            N0.getOperand(1), false))
6414       return BSwap;
6415   }
6416 
6417   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
6418     return Shifts;
6419 
6420   if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
6421     return V;
6422 
6423   // Recognize the following pattern:
6424   //
6425   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
6426   //
6427   // where bitmask is a mask that clears the upper bits of AndVT. The
6428   // number of bits in bitmask must be a power of two.
6429   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
6430     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
6431       return false;
6432 
6433     auto *C = dyn_cast<ConstantSDNode>(RHS);
6434     if (!C)
6435       return false;
6436 
6437     if (!C->getAPIntValue().isMask(
6438             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
6439       return false;
6440 
6441     return true;
6442   };
6443 
6444   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
6445   if (IsAndZeroExtMask(N0, N1))
6446     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
6447 
6448   if (hasOperation(ISD::USUBSAT, VT))
6449     if (SDValue V = foldAndToUsubsat(N, DAG))
6450       return V;
6451 
6452   return SDValue();
6453 }
6454 
6455 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
6456 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
6457                                         bool DemandHighBits) {
6458   if (!LegalOperations)
6459     return SDValue();
6460 
6461   EVT VT = N->getValueType(0);
6462   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
6463     return SDValue();
6464   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6465     return SDValue();
6466 
6467   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
6468   bool LookPassAnd0 = false;
6469   bool LookPassAnd1 = false;
6470   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
6471     std::swap(N0, N1);
6472   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
6473     std::swap(N0, N1);
6474   if (N0.getOpcode() == ISD::AND) {
6475     if (!N0->hasOneUse())
6476       return SDValue();
6477     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6478     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
6479     // This is needed for X86.
6480     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
6481                   N01C->getZExtValue() != 0xFFFF))
6482       return SDValue();
6483     N0 = N0.getOperand(0);
6484     LookPassAnd0 = true;
6485   }
6486 
6487   if (N1.getOpcode() == ISD::AND) {
6488     if (!N1->hasOneUse())
6489       return SDValue();
6490     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6491     if (!N11C || N11C->getZExtValue() != 0xFF)
6492       return SDValue();
6493     N1 = N1.getOperand(0);
6494     LookPassAnd1 = true;
6495   }
6496 
6497   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
6498     std::swap(N0, N1);
6499   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
6500     return SDValue();
6501   if (!N0->hasOneUse() || !N1->hasOneUse())
6502     return SDValue();
6503 
6504   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6505   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
6506   if (!N01C || !N11C)
6507     return SDValue();
6508   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
6509     return SDValue();
6510 
6511   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
6512   SDValue N00 = N0->getOperand(0);
6513   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
6514     if (!N00->hasOneUse())
6515       return SDValue();
6516     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6517     if (!N001C || N001C->getZExtValue() != 0xFF)
6518       return SDValue();
6519     N00 = N00.getOperand(0);
6520     LookPassAnd0 = true;
6521   }
6522 
6523   SDValue N10 = N1->getOperand(0);
6524   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6525     if (!N10->hasOneUse())
6526       return SDValue();
6527     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6528     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6529     // for X86.
6530     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6531                    N101C->getZExtValue() != 0xFFFF))
6532       return SDValue();
6533     N10 = N10.getOperand(0);
6534     LookPassAnd1 = true;
6535   }
6536 
6537   if (N00 != N10)
6538     return SDValue();
6539 
6540   // Make sure everything beyond the low halfword gets set to zero since the SRL
6541   // 16 will clear the top bits.
6542   unsigned OpSizeInBits = VT.getSizeInBits();
6543   if (OpSizeInBits > 16) {
6544     // If the left-shift isn't masked out then the only way this is a bswap is
6545     // if all bits beyond the low 8 are 0. In that case the entire pattern
6546     // reduces to a left shift anyway: leave it for other parts of the combiner.
6547     if (DemandHighBits && !LookPassAnd0)
6548       return SDValue();
6549 
6550     // However, if the right shift isn't masked out then it might be because
6551     // it's not needed. See if we can spot that too. If the high bits aren't
6552     // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
6553     // upper bits to be zero.
6554     if (!LookPassAnd1) {
6555       unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
6556       if (!DAG.MaskedValueIsZero(N10,
6557                                  APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
6558         return SDValue();
6559     }
6560   }
6561 
6562   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6563   if (OpSizeInBits > 16) {
6564     SDLoc DL(N);
6565     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6566                       DAG.getConstant(OpSizeInBits - 16, DL,
6567                                       getShiftAmountTy(VT)));
6568   }
6569   return Res;
6570 }
6571 
6572 /// Return true if the specified node is an element that makes up a 32-bit
6573 /// packed halfword byteswap.
6574 /// ((x & 0x000000ff) << 8) |
6575 /// ((x & 0x0000ff00) >> 8) |
6576 /// ((x & 0x00ff0000) << 8) |
6577 /// ((x & 0xff000000) >> 8)
6578 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6579   if (!N->hasOneUse())
6580     return false;
6581 
6582   unsigned Opc = N.getOpcode();
6583   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6584     return false;
6585 
6586   SDValue N0 = N.getOperand(0);
6587   unsigned Opc0 = N0.getOpcode();
6588   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6589     return false;
6590 
6591   ConstantSDNode *N1C = nullptr;
6592   // SHL or SRL: look upstream for AND mask operand
6593   if (Opc == ISD::AND)
6594     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6595   else if (Opc0 == ISD::AND)
6596     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6597   if (!N1C)
6598     return false;
6599 
6600   unsigned MaskByteOffset;
6601   switch (N1C->getZExtValue()) {
6602   default:
6603     return false;
6604   case 0xFF:       MaskByteOffset = 0; break;
6605   case 0xFF00:     MaskByteOffset = 1; break;
6606   case 0xFFFF:
6607     // In case demanded bits didn't clear the bits that will be shifted out.
6608     // This is needed for X86.
6609     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6610       MaskByteOffset = 1;
6611       break;
6612     }
6613     return false;
6614   case 0xFF0000:   MaskByteOffset = 2; break;
6615   case 0xFF000000: MaskByteOffset = 3; break;
6616   }
6617 
6618   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6619   if (Opc == ISD::AND) {
6620     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6621       // (x >> 8) & 0xff
6622       // (x >> 8) & 0xff0000
6623       if (Opc0 != ISD::SRL)
6624         return false;
6625       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6626       if (!C || C->getZExtValue() != 8)
6627         return false;
6628     } else {
6629       // (x << 8) & 0xff00
6630       // (x << 8) & 0xff000000
6631       if (Opc0 != ISD::SHL)
6632         return false;
6633       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6634       if (!C || C->getZExtValue() != 8)
6635         return false;
6636     }
6637   } else if (Opc == ISD::SHL) {
6638     // (x & 0xff) << 8
6639     // (x & 0xff0000) << 8
6640     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6641       return false;
6642     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6643     if (!C || C->getZExtValue() != 8)
6644       return false;
6645   } else { // Opc == ISD::SRL
6646     // (x & 0xff00) >> 8
6647     // (x & 0xff000000) >> 8
6648     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6649       return false;
6650     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6651     if (!C || C->getZExtValue() != 8)
6652       return false;
6653   }
6654 
6655   if (Parts[MaskByteOffset])
6656     return false;
6657 
6658   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6659   return true;
6660 }
6661 
6662 // Match 2 elements of a packed halfword bswap.
6663 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6664   if (N.getOpcode() == ISD::OR)
6665     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6666            isBSwapHWordElement(N.getOperand(1), Parts);
6667 
6668   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6669     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6670     if (!C || C->getAPIntValue() != 16)
6671       return false;
6672     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6673     return true;
6674   }
6675 
6676   return false;
6677 }
6678 
6679 // Match this pattern:
6680 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6681 // And rewrite this to:
6682 //   (rotr (bswap A), 16)
6683 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6684                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6685                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6686   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6687          "MatchBSwapHWordOrAndAnd: expecting i32");
6688   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6689     return SDValue();
6690   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6691     return SDValue();
6692   // TODO: this is too restrictive; lifting this restriction requires more tests
6693   if (!N0->hasOneUse() || !N1->hasOneUse())
6694     return SDValue();
6695   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6696   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6697   if (!Mask0 || !Mask1)
6698     return SDValue();
6699   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6700       Mask1->getAPIntValue() != 0x00ff00ff)
6701     return SDValue();
6702   SDValue Shift0 = N0.getOperand(0);
6703   SDValue Shift1 = N1.getOperand(0);
6704   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6705     return SDValue();
6706   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6707   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6708   if (!ShiftAmt0 || !ShiftAmt1)
6709     return SDValue();
6710   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6711     return SDValue();
6712   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6713     return SDValue();
6714 
6715   SDLoc DL(N);
6716   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6717   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6718   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6719 }
6720 
6721 /// Match a 32-bit packed halfword bswap. That is
6722 /// ((x & 0x000000ff) << 8) |
6723 /// ((x & 0x0000ff00) >> 8) |
6724 /// ((x & 0x00ff0000) << 8) |
6725 /// ((x & 0xff000000) >> 8)
6726 /// => (rotl (bswap x), 16)
6727 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6728   if (!LegalOperations)
6729     return SDValue();
6730 
6731   EVT VT = N->getValueType(0);
6732   if (VT != MVT::i32)
6733     return SDValue();
6734   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6735     return SDValue();
6736 
6737   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6738                                               getShiftAmountTy(VT)))
6739   return BSwap;
6740 
6741   // Try again with commuted operands.
6742   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6743                                               getShiftAmountTy(VT)))
6744   return BSwap;
6745 
6746 
6747   // Look for either
6748   // (or (bswaphpair), (bswaphpair))
6749   // (or (or (bswaphpair), (and)), (and))
6750   // (or (or (and), (bswaphpair)), (and))
6751   SDNode *Parts[4] = {};
6752 
6753   if (isBSwapHWordPair(N0, Parts)) {
6754     // (or (or (and), (and)), (or (and), (and)))
6755     if (!isBSwapHWordPair(N1, Parts))
6756       return SDValue();
6757   } else if (N0.getOpcode() == ISD::OR) {
6758     // (or (or (or (and), (and)), (and)), (and))
6759     if (!isBSwapHWordElement(N1, Parts))
6760       return SDValue();
6761     SDValue N00 = N0.getOperand(0);
6762     SDValue N01 = N0.getOperand(1);
6763     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6764         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6765       return SDValue();
6766   } else {
6767     return SDValue();
6768   }
6769 
6770   // Make sure the parts are all coming from the same node.
6771   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6772     return SDValue();
6773 
6774   SDLoc DL(N);
6775   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6776                               SDValue(Parts[0], 0));
6777 
6778   // Result of the bswap should be rotated by 16. If it's not legal, then
6779   // do  (x << 16) | (x >> 16).
6780   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6781   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6782     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6783   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6784     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6785   return DAG.getNode(ISD::OR, DL, VT,
6786                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6787                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6788 }
6789 
6790 /// This contains all DAGCombine rules which reduce two values combined by
6791 /// an Or operation to a single value \see visitANDLike().
6792 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6793   EVT VT = N1.getValueType();
6794   SDLoc DL(N);
6795 
6796   // fold (or x, undef) -> -1
6797   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6798     return DAG.getAllOnesConstant(DL, VT);
6799 
6800   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6801     return V;
6802 
6803   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6804   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6805       // Don't increase # computations.
6806       (N0->hasOneUse() || N1->hasOneUse())) {
6807     // We can only do this xform if we know that bits from X that are set in C2
6808     // but not in C1 are already zero.  Likewise for Y.
6809     if (const ConstantSDNode *N0O1C =
6810         getAsNonOpaqueConstant(N0.getOperand(1))) {
6811       if (const ConstantSDNode *N1O1C =
6812           getAsNonOpaqueConstant(N1.getOperand(1))) {
6813         // We can only do this xform if we know that bits from X that are set in
6814         // C2 but not in C1 are already zero.  Likewise for Y.
6815         const APInt &LHSMask = N0O1C->getAPIntValue();
6816         const APInt &RHSMask = N1O1C->getAPIntValue();
6817 
6818         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6819             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6820           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6821                                   N0.getOperand(0), N1.getOperand(0));
6822           return DAG.getNode(ISD::AND, DL, VT, X,
6823                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6824         }
6825       }
6826     }
6827   }
6828 
6829   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6830   if (N0.getOpcode() == ISD::AND &&
6831       N1.getOpcode() == ISD::AND &&
6832       N0.getOperand(0) == N1.getOperand(0) &&
6833       // Don't increase # computations.
6834       (N0->hasOneUse() || N1->hasOneUse())) {
6835     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6836                             N0.getOperand(1), N1.getOperand(1));
6837     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6838   }
6839 
6840   return SDValue();
6841 }
6842 
6843 /// OR combines for which the commuted variant will be tried as well.
6844 static SDValue visitORCommutative(
6845     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6846   EVT VT = N0.getValueType();
6847   if (N0.getOpcode() == ISD::AND) {
6848     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6849     // TODO: Set AllowUndefs = true.
6850     if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0),
6851                              /* AllowUndefs */ false) == N1)
6852       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6853 
6854     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6855     if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1),
6856                              /* AllowUndefs */ false) == N1)
6857       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6858   }
6859 
6860   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
6861     return R;
6862 
6863   auto peekThroughZext = [](SDValue V) {
6864     if (V->getOpcode() == ISD::ZERO_EXTEND)
6865       return V->getOperand(0);
6866     return V;
6867   };
6868 
6869   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
6870   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
6871       N0.getOperand(0) == N1.getOperand(0) &&
6872       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6873     return N0;
6874 
6875   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
6876   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
6877       N0.getOperand(1) == N1.getOperand(0) &&
6878       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
6879     return N0;
6880 
6881   return SDValue();
6882 }
6883 
6884 SDValue DAGCombiner::visitOR(SDNode *N) {
6885   SDValue N0 = N->getOperand(0);
6886   SDValue N1 = N->getOperand(1);
6887   EVT VT = N1.getValueType();
6888 
6889   // x | x --> x
6890   if (N0 == N1)
6891     return N0;
6892 
6893   // fold (or c1, c2) -> c1|c2
6894   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6895     return C;
6896 
6897   // canonicalize constant to RHS
6898   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6899       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6900     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6901 
6902   // fold vector ops
6903   if (VT.isVector()) {
6904     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6905       return FoldedVOp;
6906 
6907     // fold (or x, 0) -> x, vector edition
6908     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6909       return N0;
6910 
6911     // fold (or x, -1) -> -1, vector edition
6912     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6913       // do not return N1, because undef node may exist in N1
6914       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6915 
6916     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6917     // Do this only if the resulting type / shuffle is legal.
6918     auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
6919     auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
6920     if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
6921       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6922       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6923       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6924       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6925       // Ensure both shuffles have a zero input.
6926       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6927         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6928         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6929         bool CanFold = true;
6930         int NumElts = VT.getVectorNumElements();
6931         SmallVector<int, 4> Mask(NumElts, -1);
6932 
6933         for (int i = 0; i != NumElts; ++i) {
6934           int M0 = SV0->getMaskElt(i);
6935           int M1 = SV1->getMaskElt(i);
6936 
6937           // Determine if either index is pointing to a zero vector.
6938           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6939           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6940 
6941           // If one element is zero and the otherside is undef, keep undef.
6942           // This also handles the case that both are undef.
6943           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
6944             continue;
6945 
6946           // Make sure only one of the elements is zero.
6947           if (M0Zero == M1Zero) {
6948             CanFold = false;
6949             break;
6950           }
6951 
6952           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6953 
6954           // We have a zero and non-zero element. If the non-zero came from
6955           // SV0 make the index a LHS index. If it came from SV1, make it
6956           // a RHS index. We need to mod by NumElts because we don't care
6957           // which operand it came from in the original shuffles.
6958           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6959         }
6960 
6961         if (CanFold) {
6962           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6963           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6964 
6965           SDValue LegalShuffle =
6966               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6967                                           Mask, DAG);
6968           if (LegalShuffle)
6969             return LegalShuffle;
6970         }
6971       }
6972     }
6973   }
6974 
6975   // fold (or x, 0) -> x
6976   if (isNullConstant(N1))
6977     return N0;
6978 
6979   // fold (or x, -1) -> -1
6980   if (isAllOnesConstant(N1))
6981     return N1;
6982 
6983   if (SDValue NewSel = foldBinOpIntoSelect(N))
6984     return NewSel;
6985 
6986   // fold (or x, c) -> c iff (x & ~c) == 0
6987   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6988   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6989     return N1;
6990 
6991   if (SDValue Combined = visitORLike(N0, N1, N))
6992     return Combined;
6993 
6994   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
6995     return Combined;
6996 
6997   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6998   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6999     return BSwap;
7000   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7001     return BSwap;
7002 
7003   // reassociate or
7004   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7005     return ROR;
7006 
7007   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7008   // iff (c1 & c2) != 0 or c1/c2 are undef.
7009   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7010     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7011   };
7012   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7013       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7014     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7015                                                  {N1, N0.getOperand(1)})) {
7016       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7017       AddToWorklist(IOR.getNode());
7018       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7019     }
7020   }
7021 
7022   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7023     return Combined;
7024   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7025     return Combined;
7026 
7027   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
7028   if (N0.getOpcode() == N1.getOpcode())
7029     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7030       return V;
7031 
7032   // See if this is some rotate idiom.
7033   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7034     return Rot;
7035 
7036   if (SDValue Load = MatchLoadCombine(N))
7037     return Load;
7038 
7039   // Simplify the operands using demanded-bits information.
7040   if (SimplifyDemandedBits(SDValue(N, 0)))
7041     return SDValue(N, 0);
7042 
7043   // If OR can be rewritten into ADD, try combines based on ADD.
7044   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7045       DAG.haveNoCommonBitsSet(N0, N1))
7046     if (SDValue Combined = visitADDLike(N))
7047       return Combined;
7048 
7049   return SDValue();
7050 }
7051 
7052 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
7053   if (Op.getOpcode() == ISD::AND &&
7054       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
7055     Mask = Op.getOperand(1);
7056     return Op.getOperand(0);
7057   }
7058   return Op;
7059 }
7060 
7061 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
7062 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
7063                             SDValue &Mask) {
7064   Op = stripConstantMask(DAG, Op, Mask);
7065   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
7066     Shift = Op;
7067     return true;
7068   }
7069   return false;
7070 }
7071 
7072 /// Helper function for visitOR to extract the needed side of a rotate idiom
7073 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
7074 /// InstCombine merged some outside op with one of the shifts from
7075 /// the rotate pattern.
7076 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
7077 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
7078 /// patterns:
7079 ///
7080 ///   (or (add v v) (shrl v bitwidth-1)):
7081 ///     expands (add v v) -> (shl v 1)
7082 ///
7083 ///   (or (mul v c0) (shrl (mul v c1) c2)):
7084 ///     expands (mul v c0) -> (shl (mul v c1) c3)
7085 ///
7086 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
7087 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
7088 ///
7089 ///   (or (shl v c0) (shrl (shl v c1) c2)):
7090 ///     expands (shl v c0) -> (shl (shl v c1) c3)
7091 ///
7092 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
7093 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
7094 ///
7095 /// Such that in all cases, c3+c2==bitwidth(op v c1).
7096 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
7097                                      SDValue ExtractFrom, SDValue &Mask,
7098                                      const SDLoc &DL) {
7099   assert(OppShift && ExtractFrom && "Empty SDValue");
7100   assert(
7101       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
7102       "Existing shift must be valid as a rotate half");
7103 
7104   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
7105 
7106   // Value and Type of the shift.
7107   SDValue OppShiftLHS = OppShift.getOperand(0);
7108   EVT ShiftedVT = OppShiftLHS.getValueType();
7109 
7110   // Amount of the existing shift.
7111   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
7112 
7113   // (add v v) -> (shl v 1)
7114   // TODO: Should this be a general DAG canonicalization?
7115   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
7116       ExtractFrom.getOpcode() == ISD::ADD &&
7117       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
7118       ExtractFrom.getOperand(0) == OppShiftLHS &&
7119       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
7120     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
7121                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
7122 
7123   // Preconditions:
7124   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
7125   //
7126   // Find opcode of the needed shift to be extracted from (op0 v c0).
7127   unsigned Opcode = ISD::DELETED_NODE;
7128   bool IsMulOrDiv = false;
7129   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
7130   // opcode or its arithmetic (mul or udiv) variant.
7131   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
7132     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
7133     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
7134       return false;
7135     Opcode = NeededShift;
7136     return true;
7137   };
7138   // op0 must be either the needed shift opcode or the mul/udiv equivalent
7139   // that the needed shift can be extracted from.
7140   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
7141       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
7142     return SDValue();
7143 
7144   // op0 must be the same opcode on both sides, have the same LHS argument,
7145   // and produce the same value type.
7146   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
7147       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
7148       ShiftedVT != ExtractFrom.getValueType())
7149     return SDValue();
7150 
7151   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
7152   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
7153   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
7154   ConstantSDNode *ExtractFromCst =
7155       isConstOrConstSplat(ExtractFrom.getOperand(1));
7156   // TODO: We should be able to handle non-uniform constant vectors for these values
7157   // Check that we have constant values.
7158   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
7159       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
7160       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
7161     return SDValue();
7162 
7163   // Compute the shift amount we need to extract to complete the rotate.
7164   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
7165   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
7166     return SDValue();
7167   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
7168   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
7169   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
7170   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
7171   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
7172 
7173   // Now try extract the needed shift from the ExtractFrom op and see if the
7174   // result matches up with the existing shift's LHS op.
7175   if (IsMulOrDiv) {
7176     // Op to extract from is a mul or udiv by a constant.
7177     // Check:
7178     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
7179     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
7180     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
7181                                                  NeededShiftAmt.getZExtValue());
7182     APInt ResultAmt;
7183     APInt Rem;
7184     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
7185     if (Rem != 0 || ResultAmt != OppLHSAmt)
7186       return SDValue();
7187   } else {
7188     // Op to extract from is a shift by a constant.
7189     // Check:
7190     //      c2 - (bitwidth(op0 v c0) - c1) == c0
7191     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
7192                                           ExtractFromAmt.getBitWidth()))
7193       return SDValue();
7194   }
7195 
7196   // Return the expanded shift op that should allow a rotate to be formed.
7197   EVT ShiftVT = OppShift.getOperand(1).getValueType();
7198   EVT ResVT = ExtractFrom.getValueType();
7199   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
7200   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
7201 }
7202 
7203 // Return true if we can prove that, whenever Neg and Pos are both in the
7204 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
7205 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
7206 //
7207 //     (or (shift1 X, Neg), (shift2 X, Pos))
7208 //
7209 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
7210 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
7211 // to consider shift amounts with defined behavior.
7212 //
7213 // The IsRotate flag should be set when the LHS of both shifts is the same.
7214 // Otherwise if matching a general funnel shift, it should be clear.
7215 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
7216                            SelectionDAG &DAG, bool IsRotate) {
7217   // If EltSize is a power of 2 then:
7218   //
7219   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
7220   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
7221   //
7222   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
7223   // for the stronger condition:
7224   //
7225   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
7226   //
7227   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
7228   // we can just replace Neg with Neg' for the rest of the function.
7229   //
7230   // In other cases we check for the even stronger condition:
7231   //
7232   //     Neg == EltSize - Pos                                    [B]
7233   //
7234   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
7235   // behavior if Pos == 0 (and consequently Neg == EltSize).
7236   //
7237   // We could actually use [A] whenever EltSize is a power of 2, but the
7238   // only extra cases that it would match are those uninteresting ones
7239   // where Neg and Pos are never in range at the same time.  E.g. for
7240   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
7241   // as well as (sub 32, Pos), but:
7242   //
7243   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
7244   //
7245   // always invokes undefined behavior for 32-bit X.
7246   //
7247   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
7248   //
7249   // NOTE: We can only do this when matching an AND and not a general
7250   // funnel shift.
7251   unsigned MaskLoBits = 0;
7252   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
7253     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
7254       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
7255       unsigned Bits = Log2_64(EltSize);
7256       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
7257           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
7258         Neg = Neg.getOperand(0);
7259         MaskLoBits = Bits;
7260       }
7261     }
7262   }
7263 
7264   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
7265   if (Neg.getOpcode() != ISD::SUB)
7266     return false;
7267   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
7268   if (!NegC)
7269     return false;
7270   SDValue NegOp1 = Neg.getOperand(1);
7271 
7272   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
7273   // Pos'.  The truncation is redundant for the purpose of the equality.
7274   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
7275     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
7276       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
7277       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
7278           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
7279            MaskLoBits))
7280         Pos = Pos.getOperand(0);
7281     }
7282   }
7283 
7284   // The condition we need is now:
7285   //
7286   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
7287   //
7288   // If NegOp1 == Pos then we need:
7289   //
7290   //              EltSize & Mask == NegC & Mask
7291   //
7292   // (because "x & Mask" is a truncation and distributes through subtraction).
7293   //
7294   // We also need to account for a potential truncation of NegOp1 if the amount
7295   // has already been legalized to a shift amount type.
7296   APInt Width;
7297   if ((Pos == NegOp1) ||
7298       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
7299     Width = NegC->getAPIntValue();
7300 
7301   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
7302   // Then the condition we want to prove becomes:
7303   //
7304   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
7305   //
7306   // which, again because "x & Mask" is a truncation, becomes:
7307   //
7308   //                NegC & Mask == (EltSize - PosC) & Mask
7309   //             EltSize & Mask == (NegC + PosC) & Mask
7310   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
7311     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
7312       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
7313     else
7314       return false;
7315   } else
7316     return false;
7317 
7318   // Now we just need to check that EltSize & Mask == Width & Mask.
7319   if (MaskLoBits)
7320     // EltSize & Mask is 0 since Mask is EltSize - 1.
7321     return Width.getLoBits(MaskLoBits) == 0;
7322   return Width == EltSize;
7323 }
7324 
7325 // A subroutine of MatchRotate used once we have found an OR of two opposite
7326 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
7327 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
7328 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7329 // Neg with outer conversions stripped away.
7330 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
7331                                        SDValue Neg, SDValue InnerPos,
7332                                        SDValue InnerNeg, bool HasPos,
7333                                        unsigned PosOpcode, unsigned NegOpcode,
7334                                        const SDLoc &DL) {
7335   // fold (or (shl x, (*ext y)),
7336   //          (srl x, (*ext (sub 32, y)))) ->
7337   //   (rotl x, y) or (rotr x, (sub 32, y))
7338   //
7339   // fold (or (shl x, (*ext (sub 32, y))),
7340   //          (srl x, (*ext y))) ->
7341   //   (rotr x, y) or (rotl x, (sub 32, y))
7342   EVT VT = Shifted.getValueType();
7343   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
7344                      /*IsRotate*/ true)) {
7345     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
7346                        HasPos ? Pos : Neg);
7347   }
7348 
7349   return SDValue();
7350 }
7351 
7352 // A subroutine of MatchRotate used once we have found an OR of two opposite
7353 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
7354 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
7355 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
7356 // Neg with outer conversions stripped away.
7357 // TODO: Merge with MatchRotatePosNeg.
7358 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
7359                                        SDValue Neg, SDValue InnerPos,
7360                                        SDValue InnerNeg, bool HasPos,
7361                                        unsigned PosOpcode, unsigned NegOpcode,
7362                                        const SDLoc &DL) {
7363   EVT VT = N0.getValueType();
7364   unsigned EltBits = VT.getScalarSizeInBits();
7365 
7366   // fold (or (shl x0, (*ext y)),
7367   //          (srl x1, (*ext (sub 32, y)))) ->
7368   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
7369   //
7370   // fold (or (shl x0, (*ext (sub 32, y))),
7371   //          (srl x1, (*ext y))) ->
7372   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
7373   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
7374     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
7375                        HasPos ? Pos : Neg);
7376   }
7377 
7378   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
7379   // so for now just use the PosOpcode case if its legal.
7380   // TODO: When can we use the NegOpcode case?
7381   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
7382     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
7383       if (Op.getOpcode() != BinOpc)
7384         return false;
7385       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
7386       return Cst && (Cst->getAPIntValue() == Imm);
7387     };
7388 
7389     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
7390     //   -> (fshl x0, x1, y)
7391     if (IsBinOpImm(N1, ISD::SRL, 1) &&
7392         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
7393         InnerPos == InnerNeg.getOperand(0) &&
7394         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
7395       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
7396     }
7397 
7398     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
7399     //   -> (fshr x0, x1, y)
7400     if (IsBinOpImm(N0, ISD::SHL, 1) &&
7401         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7402         InnerNeg == InnerPos.getOperand(0) &&
7403         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7404       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7405     }
7406 
7407     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
7408     //   -> (fshr x0, x1, y)
7409     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
7410     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
7411         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
7412         InnerNeg == InnerPos.getOperand(0) &&
7413         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
7414       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
7415     }
7416   }
7417 
7418   return SDValue();
7419 }
7420 
7421 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
7422 // idioms for rotate, and if the target supports rotation instructions, generate
7423 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
7424 // with different shifted sources.
7425 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
7426   EVT VT = LHS.getValueType();
7427 
7428   // The target must have at least one rotate/funnel flavor.
7429   // We still try to match rotate by constant pre-legalization.
7430   // TODO: Support pre-legalization funnel-shift by constant.
7431   bool HasROTL = hasOperation(ISD::ROTL, VT);
7432   bool HasROTR = hasOperation(ISD::ROTR, VT);
7433   bool HasFSHL = hasOperation(ISD::FSHL, VT);
7434   bool HasFSHR = hasOperation(ISD::FSHR, VT);
7435 
7436   // If the type is going to be promoted and the target has enabled custom
7437   // lowering for rotate, allow matching rotate by non-constants. Only allow
7438   // this for scalar types.
7439   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
7440                                   TargetLowering::TypePromoteInteger) {
7441     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
7442     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
7443   }
7444 
7445   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7446     return SDValue();
7447 
7448   // Check for truncated rotate.
7449   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
7450       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
7451     assert(LHS.getValueType() == RHS.getValueType());
7452     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
7453       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
7454     }
7455   }
7456 
7457   // Match "(X shl/srl V1) & V2" where V2 may not be present.
7458   SDValue LHSShift;   // The shift.
7459   SDValue LHSMask;    // AND value if any.
7460   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
7461 
7462   SDValue RHSShift;   // The shift.
7463   SDValue RHSMask;    // AND value if any.
7464   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
7465 
7466   // If neither side matched a rotate half, bail
7467   if (!LHSShift && !RHSShift)
7468     return SDValue();
7469 
7470   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
7471   // side of the rotate, so try to handle that here. In all cases we need to
7472   // pass the matched shift from the opposite side to compute the opcode and
7473   // needed shift amount to extract.  We still want to do this if both sides
7474   // matched a rotate half because one half may be a potential overshift that
7475   // can be broken down (ie if InstCombine merged two shl or srl ops into a
7476   // single one).
7477 
7478   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
7479   if (LHSShift)
7480     if (SDValue NewRHSShift =
7481             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
7482       RHSShift = NewRHSShift;
7483   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
7484   if (RHSShift)
7485     if (SDValue NewLHSShift =
7486             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
7487       LHSShift = NewLHSShift;
7488 
7489   // If a side is still missing, nothing else we can do.
7490   if (!RHSShift || !LHSShift)
7491     return SDValue();
7492 
7493   // At this point we've matched or extracted a shift op on each side.
7494 
7495   if (LHSShift.getOpcode() == RHSShift.getOpcode())
7496     return SDValue(); // Shifts must disagree.
7497 
7498   // Canonicalize shl to left side in a shl/srl pair.
7499   if (RHSShift.getOpcode() == ISD::SHL) {
7500     std::swap(LHS, RHS);
7501     std::swap(LHSShift, RHSShift);
7502     std::swap(LHSMask, RHSMask);
7503   }
7504 
7505   unsigned EltSizeInBits = VT.getScalarSizeInBits();
7506   SDValue LHSShiftArg = LHSShift.getOperand(0);
7507   SDValue LHSShiftAmt = LHSShift.getOperand(1);
7508   SDValue RHSShiftArg = RHSShift.getOperand(0);
7509   SDValue RHSShiftAmt = RHSShift.getOperand(1);
7510 
7511   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
7512                                         ConstantSDNode *RHS) {
7513     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
7514   };
7515 
7516   auto ApplyMasks = [&](SDValue Res) {
7517     // If there is an AND of either shifted operand, apply it to the result.
7518     if (LHSMask.getNode() || RHSMask.getNode()) {
7519       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
7520       SDValue Mask = AllOnes;
7521 
7522       if (LHSMask.getNode()) {
7523         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
7524         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7525                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
7526       }
7527       if (RHSMask.getNode()) {
7528         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7529         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7530                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7531       }
7532 
7533       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7534     }
7535 
7536     return Res;
7537   };
7538 
7539   // TODO: Support pre-legalization funnel-shift by constant.
7540   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
7541   if (!IsRotate && !(HasFSHL || HasFSHR)) {
7542     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
7543         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7544       // Look for a disguised rotate by constant.
7545       // The common shifted operand X may be hidden inside another 'or'.
7546       SDValue X, Y;
7547       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
7548         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
7549           return false;
7550         if (CommonOp == Or.getOperand(0)) {
7551           X = CommonOp;
7552           Y = Or.getOperand(1);
7553           return true;
7554         }
7555         if (CommonOp == Or.getOperand(1)) {
7556           X = CommonOp;
7557           Y = Or.getOperand(0);
7558           return true;
7559         }
7560         return false;
7561       };
7562 
7563       SDValue Res;
7564       if (matchOr(LHSShiftArg, RHSShiftArg)) {
7565         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
7566         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7567         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
7568         Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
7569       } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
7570         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
7571         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
7572         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
7573         Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
7574       } else {
7575         return SDValue();
7576       }
7577 
7578       return ApplyMasks(Res);
7579     }
7580 
7581     return SDValue(); // Requires funnel shift support.
7582   }
7583 
7584   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
7585   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
7586   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
7587   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
7588   // iff C1+C2 == EltSizeInBits
7589   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
7590     SDValue Res;
7591     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
7592       bool UseROTL = !LegalOperations || HasROTL;
7593       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
7594                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
7595     } else {
7596       bool UseFSHL = !LegalOperations || HasFSHL;
7597       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
7598                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
7599     }
7600 
7601     return ApplyMasks(Res);
7602   }
7603 
7604   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
7605   // shift.
7606   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
7607     return SDValue();
7608 
7609   // If there is a mask here, and we have a variable shift, we can't be sure
7610   // that we're masking out the right stuff.
7611   if (LHSMask.getNode() || RHSMask.getNode())
7612     return SDValue();
7613 
7614   // If the shift amount is sign/zext/any-extended just peel it off.
7615   SDValue LExtOp0 = LHSShiftAmt;
7616   SDValue RExtOp0 = RHSShiftAmt;
7617   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7618        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7619        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7620        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7621       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7622        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7623        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7624        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7625     LExtOp0 = LHSShiftAmt.getOperand(0);
7626     RExtOp0 = RHSShiftAmt.getOperand(0);
7627   }
7628 
7629   if (IsRotate && (HasROTL || HasROTR)) {
7630     SDValue TryL =
7631         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7632                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
7633     if (TryL)
7634       return TryL;
7635 
7636     SDValue TryR =
7637         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7638                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
7639     if (TryR)
7640       return TryR;
7641   }
7642 
7643   SDValue TryL =
7644       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7645                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
7646   if (TryL)
7647     return TryL;
7648 
7649   SDValue TryR =
7650       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7651                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
7652   if (TryR)
7653     return TryR;
7654 
7655   return SDValue();
7656 }
7657 
7658 namespace {
7659 
7660 /// Represents known origin of an individual byte in load combine pattern. The
7661 /// value of the byte is either constant zero or comes from memory.
7662 struct ByteProvider {
7663   // For constant zero providers Load is set to nullptr. For memory providers
7664   // Load represents the node which loads the byte from memory.
7665   // ByteOffset is the offset of the byte in the value produced by the load.
7666   LoadSDNode *Load = nullptr;
7667   unsigned ByteOffset = 0;
7668 
7669   ByteProvider() = default;
7670 
7671   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7672     return ByteProvider(Load, ByteOffset);
7673   }
7674 
7675   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7676 
7677   bool isConstantZero() const { return !Load; }
7678   bool isMemory() const { return Load; }
7679 
7680   bool operator==(const ByteProvider &Other) const {
7681     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7682   }
7683 
7684 private:
7685   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7686       : Load(Load), ByteOffset(ByteOffset) {}
7687 };
7688 
7689 } // end anonymous namespace
7690 
7691 /// Recursively traverses the expression calculating the origin of the requested
7692 /// byte of the given value. Returns None if the provider can't be calculated.
7693 ///
7694 /// For all the values except the root of the expression verifies that the value
7695 /// has exactly one use and if it's not true return None. This way if the origin
7696 /// of the byte is returned it's guaranteed that the values which contribute to
7697 /// the byte are not used outside of this expression.
7698 ///
7699 /// Because the parts of the expression are not allowed to have more than one
7700 /// use this function iterates over trees, not DAGs. So it never visits the same
7701 /// node more than once.
7702 static const Optional<ByteProvider>
7703 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7704                       bool Root = false) {
7705   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7706   if (Depth == 10)
7707     return None;
7708 
7709   if (!Root && !Op.hasOneUse())
7710     return None;
7711 
7712   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7713   unsigned BitWidth = Op.getValueSizeInBits();
7714   if (BitWidth % 8 != 0)
7715     return None;
7716   unsigned ByteWidth = BitWidth / 8;
7717   assert(Index < ByteWidth && "invalid index requested");
7718   (void) ByteWidth;
7719 
7720   switch (Op.getOpcode()) {
7721   case ISD::OR: {
7722     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7723     if (!LHS)
7724       return None;
7725     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7726     if (!RHS)
7727       return None;
7728 
7729     if (LHS->isConstantZero())
7730       return RHS;
7731     if (RHS->isConstantZero())
7732       return LHS;
7733     return None;
7734   }
7735   case ISD::SHL: {
7736     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7737     if (!ShiftOp)
7738       return None;
7739 
7740     uint64_t BitShift = ShiftOp->getZExtValue();
7741     if (BitShift % 8 != 0)
7742       return None;
7743     uint64_t ByteShift = BitShift / 8;
7744 
7745     return Index < ByteShift
7746                ? ByteProvider::getConstantZero()
7747                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7748                                        Depth + 1);
7749   }
7750   case ISD::ANY_EXTEND:
7751   case ISD::SIGN_EXTEND:
7752   case ISD::ZERO_EXTEND: {
7753     SDValue NarrowOp = Op->getOperand(0);
7754     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7755     if (NarrowBitWidth % 8 != 0)
7756       return None;
7757     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7758 
7759     if (Index >= NarrowByteWidth)
7760       return Op.getOpcode() == ISD::ZERO_EXTEND
7761                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7762                  : None;
7763     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7764   }
7765   case ISD::BSWAP:
7766     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7767                                  Depth + 1);
7768   case ISD::LOAD: {
7769     auto L = cast<LoadSDNode>(Op.getNode());
7770     if (!L->isSimple() || L->isIndexed())
7771       return None;
7772 
7773     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7774     if (NarrowBitWidth % 8 != 0)
7775       return None;
7776     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7777 
7778     if (Index >= NarrowByteWidth)
7779       return L->getExtensionType() == ISD::ZEXTLOAD
7780                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7781                  : None;
7782     return ByteProvider::getMemory(L, Index);
7783   }
7784   }
7785 
7786   return None;
7787 }
7788 
7789 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7790   return i;
7791 }
7792 
7793 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7794   return BW - i - 1;
7795 }
7796 
7797 // Check if the bytes offsets we are looking at match with either big or
7798 // little endian value loaded. Return true for big endian, false for little
7799 // endian, and None if match failed.
7800 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7801                                   int64_t FirstOffset) {
7802   // The endian can be decided only when it is 2 bytes at least.
7803   unsigned Width = ByteOffsets.size();
7804   if (Width < 2)
7805     return None;
7806 
7807   bool BigEndian = true, LittleEndian = true;
7808   for (unsigned i = 0; i < Width; i++) {
7809     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7810     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7811     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7812     if (!BigEndian && !LittleEndian)
7813       return None;
7814   }
7815 
7816   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7817                                         "little endian");
7818   return BigEndian;
7819 }
7820 
7821 static SDValue stripTruncAndExt(SDValue Value) {
7822   switch (Value.getOpcode()) {
7823   case ISD::TRUNCATE:
7824   case ISD::ZERO_EXTEND:
7825   case ISD::SIGN_EXTEND:
7826   case ISD::ANY_EXTEND:
7827     return stripTruncAndExt(Value.getOperand(0));
7828   }
7829   return Value;
7830 }
7831 
7832 /// Match a pattern where a wide type scalar value is stored by several narrow
7833 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7834 /// supports it.
7835 ///
7836 /// Assuming little endian target:
7837 ///  i8 *p = ...
7838 ///  i32 val = ...
7839 ///  p[0] = (val >> 0) & 0xFF;
7840 ///  p[1] = (val >> 8) & 0xFF;
7841 ///  p[2] = (val >> 16) & 0xFF;
7842 ///  p[3] = (val >> 24) & 0xFF;
7843 /// =>
7844 ///  *((i32)p) = val;
7845 ///
7846 ///  i8 *p = ...
7847 ///  i32 val = ...
7848 ///  p[0] = (val >> 24) & 0xFF;
7849 ///  p[1] = (val >> 16) & 0xFF;
7850 ///  p[2] = (val >> 8) & 0xFF;
7851 ///  p[3] = (val >> 0) & 0xFF;
7852 /// =>
7853 ///  *((i32)p) = BSWAP(val);
7854 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7855   // The matching looks for "store (trunc x)" patterns that appear early but are
7856   // likely to be replaced by truncating store nodes during combining.
7857   // TODO: If there is evidence that running this later would help, this
7858   //       limitation could be removed. Legality checks may need to be added
7859   //       for the created store and optional bswap/rotate.
7860   if (LegalOperations || OptLevel == CodeGenOpt::None)
7861     return SDValue();
7862 
7863   // We only handle merging simple stores of 1-4 bytes.
7864   // TODO: Allow unordered atomics when wider type is legal (see D66309)
7865   EVT MemVT = N->getMemoryVT();
7866   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7867       !N->isSimple() || N->isIndexed())
7868     return SDValue();
7869 
7870   // Collect all of the stores in the chain.
7871   SDValue Chain = N->getChain();
7872   SmallVector<StoreSDNode *, 8> Stores = {N};
7873   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
7874     // All stores must be the same size to ensure that we are writing all of the
7875     // bytes in the wide value.
7876     // TODO: We could allow multiple sizes by tracking each stored byte.
7877     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
7878         Store->isIndexed())
7879       return SDValue();
7880     Stores.push_back(Store);
7881     Chain = Store->getChain();
7882   }
7883   // There is no reason to continue if we do not have at least a pair of stores.
7884   if (Stores.size() < 2)
7885     return SDValue();
7886 
7887   // Handle simple types only.
7888   LLVMContext &Context = *DAG.getContext();
7889   unsigned NumStores = Stores.size();
7890   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7891   unsigned WideNumBits = NumStores * NarrowNumBits;
7892   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7893   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7894     return SDValue();
7895 
7896   // Check if all bytes of the source value that we are looking at are stored
7897   // to the same base address. Collect offsets from Base address into OffsetMap.
7898   SDValue SourceValue;
7899   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7900   int64_t FirstOffset = INT64_MAX;
7901   StoreSDNode *FirstStore = nullptr;
7902   Optional<BaseIndexOffset> Base;
7903   for (auto Store : Stores) {
7904     // All the stores store different parts of the CombinedValue. A truncate is
7905     // required to get the partial value.
7906     SDValue Trunc = Store->getValue();
7907     if (Trunc.getOpcode() != ISD::TRUNCATE)
7908       return SDValue();
7909     // Other than the first/last part, a shift operation is required to get the
7910     // offset.
7911     int64_t Offset = 0;
7912     SDValue WideVal = Trunc.getOperand(0);
7913     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7914         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7915       // The shift amount must be a constant multiple of the narrow type.
7916       // It is translated to the offset address in the wide source value "y".
7917       //
7918       // x = srl y, ShiftAmtC
7919       // i8 z = trunc x
7920       // store z, ...
7921       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7922       if (ShiftAmtC % NarrowNumBits != 0)
7923         return SDValue();
7924 
7925       Offset = ShiftAmtC / NarrowNumBits;
7926       WideVal = WideVal.getOperand(0);
7927     }
7928 
7929     // Stores must share the same source value with different offsets.
7930     // Truncate and extends should be stripped to get the single source value.
7931     if (!SourceValue)
7932       SourceValue = WideVal;
7933     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7934       return SDValue();
7935     else if (SourceValue.getValueType() != WideVT) {
7936       if (WideVal.getValueType() == WideVT ||
7937           WideVal.getScalarValueSizeInBits() >
7938               SourceValue.getScalarValueSizeInBits())
7939         SourceValue = WideVal;
7940       // Give up if the source value type is smaller than the store size.
7941       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7942         return SDValue();
7943     }
7944 
7945     // Stores must share the same base address.
7946     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7947     int64_t ByteOffsetFromBase = 0;
7948     if (!Base)
7949       Base = Ptr;
7950     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7951       return SDValue();
7952 
7953     // Remember the first store.
7954     if (ByteOffsetFromBase < FirstOffset) {
7955       FirstStore = Store;
7956       FirstOffset = ByteOffsetFromBase;
7957     }
7958     // Map the offset in the store and the offset in the combined value, and
7959     // early return if it has been set before.
7960     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7961       return SDValue();
7962     OffsetMap[Offset] = ByteOffsetFromBase;
7963   }
7964 
7965   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7966   assert(FirstStore && "First store must be set");
7967 
7968   // Check that a store of the wide type is both allowed and fast on the target
7969   const DataLayout &Layout = DAG.getDataLayout();
7970   bool Fast = false;
7971   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7972                                         *FirstStore->getMemOperand(), &Fast);
7973   if (!Allowed || !Fast)
7974     return SDValue();
7975 
7976   // Check if the pieces of the value are going to the expected places in memory
7977   // to merge the stores.
7978   auto checkOffsets = [&](bool MatchLittleEndian) {
7979     if (MatchLittleEndian) {
7980       for (unsigned i = 0; i != NumStores; ++i)
7981         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7982           return false;
7983     } else { // MatchBigEndian by reversing loop counter.
7984       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7985         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7986           return false;
7987     }
7988     return true;
7989   };
7990 
7991   // Check if the offsets line up for the native data layout of this target.
7992   bool NeedBswap = false;
7993   bool NeedRotate = false;
7994   if (!checkOffsets(Layout.isLittleEndian())) {
7995     // Special-case: check if byte offsets line up for the opposite endian.
7996     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7997       NeedBswap = true;
7998     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7999       NeedRotate = true;
8000     else
8001       return SDValue();
8002   }
8003 
8004   SDLoc DL(N);
8005   if (WideVT != SourceValue.getValueType()) {
8006     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
8007            "Unexpected store value to merge");
8008     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
8009   }
8010 
8011   // Before legalize we can introduce illegal bswaps/rotates which will be later
8012   // converted to an explicit bswap sequence. This way we end up with a single
8013   // store and byte shuffling instead of several stores and byte shuffling.
8014   if (NeedBswap) {
8015     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
8016   } else if (NeedRotate) {
8017     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
8018     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
8019     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
8020   }
8021 
8022   SDValue NewStore =
8023       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
8024                    FirstStore->getPointerInfo(), FirstStore->getAlign());
8025 
8026   // Rely on other DAG combine rules to remove the other individual stores.
8027   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
8028   return NewStore;
8029 }
8030 
8031 /// Match a pattern where a wide type scalar value is loaded by several narrow
8032 /// loads and combined by shifts and ors. Fold it into a single load or a load
8033 /// and a BSWAP if the targets supports it.
8034 ///
8035 /// Assuming little endian target:
8036 ///  i8 *a = ...
8037 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
8038 /// =>
8039 ///  i32 val = *((i32)a)
8040 ///
8041 ///  i8 *a = ...
8042 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
8043 /// =>
8044 ///  i32 val = BSWAP(*((i32)a))
8045 ///
8046 /// TODO: This rule matches complex patterns with OR node roots and doesn't
8047 /// interact well with the worklist mechanism. When a part of the pattern is
8048 /// updated (e.g. one of the loads) its direct users are put into the worklist,
8049 /// but the root node of the pattern which triggers the load combine is not
8050 /// necessarily a direct user of the changed node. For example, once the address
8051 /// of t28 load is reassociated load combine won't be triggered:
8052 ///             t25: i32 = add t4, Constant:i32<2>
8053 ///           t26: i64 = sign_extend t25
8054 ///        t27: i64 = add t2, t26
8055 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
8056 ///     t29: i32 = zero_extend t28
8057 ///   t32: i32 = shl t29, Constant:i8<8>
8058 /// t33: i32 = or t23, t32
8059 /// As a possible fix visitLoad can check if the load can be a part of a load
8060 /// combine pattern and add corresponding OR roots to the worklist.
8061 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
8062   assert(N->getOpcode() == ISD::OR &&
8063          "Can only match load combining against OR nodes");
8064 
8065   // Handles simple types only
8066   EVT VT = N->getValueType(0);
8067   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
8068     return SDValue();
8069   unsigned ByteWidth = VT.getSizeInBits() / 8;
8070 
8071   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
8072   auto MemoryByteOffset = [&] (ByteProvider P) {
8073     assert(P.isMemory() && "Must be a memory byte provider");
8074     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
8075     assert(LoadBitWidth % 8 == 0 &&
8076            "can only analyze providers for individual bytes not bit");
8077     unsigned LoadByteWidth = LoadBitWidth / 8;
8078     return IsBigEndianTarget
8079             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
8080             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
8081   };
8082 
8083   Optional<BaseIndexOffset> Base;
8084   SDValue Chain;
8085 
8086   SmallPtrSet<LoadSDNode *, 8> Loads;
8087   Optional<ByteProvider> FirstByteProvider;
8088   int64_t FirstOffset = INT64_MAX;
8089 
8090   // Check if all the bytes of the OR we are looking at are loaded from the same
8091   // base address. Collect bytes offsets from Base address in ByteOffsets.
8092   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
8093   unsigned ZeroExtendedBytes = 0;
8094   for (int i = ByteWidth - 1; i >= 0; --i) {
8095     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
8096     if (!P)
8097       return SDValue();
8098 
8099     if (P->isConstantZero()) {
8100       // It's OK for the N most significant bytes to be 0, we can just
8101       // zero-extend the load.
8102       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
8103         return SDValue();
8104       continue;
8105     }
8106     assert(P->isMemory() && "provenance should either be memory or zero");
8107 
8108     LoadSDNode *L = P->Load;
8109     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
8110            !L->isIndexed() &&
8111            "Must be enforced by calculateByteProvider");
8112     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
8113 
8114     // All loads must share the same chain
8115     SDValue LChain = L->getChain();
8116     if (!Chain)
8117       Chain = LChain;
8118     else if (Chain != LChain)
8119       return SDValue();
8120 
8121     // Loads must share the same base address
8122     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
8123     int64_t ByteOffsetFromBase = 0;
8124     if (!Base)
8125       Base = Ptr;
8126     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8127       return SDValue();
8128 
8129     // Calculate the offset of the current byte from the base address
8130     ByteOffsetFromBase += MemoryByteOffset(*P);
8131     ByteOffsets[i] = ByteOffsetFromBase;
8132 
8133     // Remember the first byte load
8134     if (ByteOffsetFromBase < FirstOffset) {
8135       FirstByteProvider = P;
8136       FirstOffset = ByteOffsetFromBase;
8137     }
8138 
8139     Loads.insert(L);
8140   }
8141   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
8142          "memory, so there must be at least one load which produces the value");
8143   assert(Base && "Base address of the accessed memory location must be set");
8144   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8145 
8146   bool NeedsZext = ZeroExtendedBytes > 0;
8147 
8148   EVT MemVT =
8149       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
8150 
8151   if (!MemVT.isSimple())
8152     return SDValue();
8153 
8154   // Before legalize we can introduce too wide illegal loads which will be later
8155   // split into legal sized loads. This enables us to combine i64 load by i8
8156   // patterns to a couple of i32 loads on 32 bit targets.
8157   if (LegalOperations &&
8158       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
8159                             MemVT))
8160     return SDValue();
8161 
8162   // Check if the bytes of the OR we are looking at match with either big or
8163   // little endian value load
8164   Optional<bool> IsBigEndian = isBigEndian(
8165       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
8166   if (!IsBigEndian)
8167     return SDValue();
8168 
8169   assert(FirstByteProvider && "must be set");
8170 
8171   // Ensure that the first byte is loaded from zero offset of the first load.
8172   // So the combined value can be loaded from the first load address.
8173   if (MemoryByteOffset(*FirstByteProvider) != 0)
8174     return SDValue();
8175   LoadSDNode *FirstLoad = FirstByteProvider->Load;
8176 
8177   // The node we are looking at matches with the pattern, check if we can
8178   // replace it with a single (possibly zero-extended) load and bswap + shift if
8179   // needed.
8180 
8181   // If the load needs byte swap check if the target supports it
8182   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
8183 
8184   // Before legalize we can introduce illegal bswaps which will be later
8185   // converted to an explicit bswap sequence. This way we end up with a single
8186   // load and byte shuffling instead of several loads and byte shuffling.
8187   // We do not introduce illegal bswaps when zero-extending as this tends to
8188   // introduce too many arithmetic instructions.
8189   if (NeedsBswap && (LegalOperations || NeedsZext) &&
8190       !TLI.isOperationLegal(ISD::BSWAP, VT))
8191     return SDValue();
8192 
8193   // If we need to bswap and zero extend, we have to insert a shift. Check that
8194   // it is legal.
8195   if (NeedsBswap && NeedsZext && LegalOperations &&
8196       !TLI.isOperationLegal(ISD::SHL, VT))
8197     return SDValue();
8198 
8199   // Check that a load of the wide type is both allowed and fast on the target
8200   bool Fast = false;
8201   bool Allowed =
8202       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
8203                              *FirstLoad->getMemOperand(), &Fast);
8204   if (!Allowed || !Fast)
8205     return SDValue();
8206 
8207   SDValue NewLoad =
8208       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
8209                      Chain, FirstLoad->getBasePtr(),
8210                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
8211 
8212   // Transfer chain users from old loads to the new load.
8213   for (LoadSDNode *L : Loads)
8214     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
8215 
8216   if (!NeedsBswap)
8217     return NewLoad;
8218 
8219   SDValue ShiftedLoad =
8220       NeedsZext
8221           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
8222                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
8223                                                    SDLoc(N), LegalOperations))
8224           : NewLoad;
8225   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
8226 }
8227 
8228 // If the target has andn, bsl, or a similar bit-select instruction,
8229 // we want to unfold masked merge, with canonical pattern of:
8230 //   |        A  |  |B|
8231 //   ((x ^ y) & m) ^ y
8232 //    |  D  |
8233 // Into:
8234 //   (x & m) | (y & ~m)
8235 // If y is a constant, m is not a 'not', and the 'andn' does not work with
8236 // immediates, we unfold into a different pattern:
8237 //   ~(~x & m) & (m | y)
8238 // If x is a constant, m is a 'not', and the 'andn' does not work with
8239 // immediates, we unfold into a different pattern:
8240 //   (x | ~m) & ~(~m & ~y)
8241 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
8242 //       the very least that breaks andnpd / andnps patterns, and because those
8243 //       patterns are simplified in IR and shouldn't be created in the DAG
8244 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
8245   assert(N->getOpcode() == ISD::XOR);
8246 
8247   // Don't touch 'not' (i.e. where y = -1).
8248   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
8249     return SDValue();
8250 
8251   EVT VT = N->getValueType(0);
8252 
8253   // There are 3 commutable operators in the pattern,
8254   // so we have to deal with 8 possible variants of the basic pattern.
8255   SDValue X, Y, M;
8256   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
8257     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
8258       return false;
8259     SDValue Xor = And.getOperand(XorIdx);
8260     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
8261       return false;
8262     SDValue Xor0 = Xor.getOperand(0);
8263     SDValue Xor1 = Xor.getOperand(1);
8264     // Don't touch 'not' (i.e. where y = -1).
8265     if (isAllOnesOrAllOnesSplat(Xor1))
8266       return false;
8267     if (Other == Xor0)
8268       std::swap(Xor0, Xor1);
8269     if (Other != Xor1)
8270       return false;
8271     X = Xor0;
8272     Y = Xor1;
8273     M = And.getOperand(XorIdx ? 0 : 1);
8274     return true;
8275   };
8276 
8277   SDValue N0 = N->getOperand(0);
8278   SDValue N1 = N->getOperand(1);
8279   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
8280       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
8281     return SDValue();
8282 
8283   // Don't do anything if the mask is constant. This should not be reachable.
8284   // InstCombine should have already unfolded this pattern, and DAGCombiner
8285   // probably shouldn't produce it, too.
8286   if (isa<ConstantSDNode>(M.getNode()))
8287     return SDValue();
8288 
8289   // We can transform if the target has AndNot
8290   if (!TLI.hasAndNot(M))
8291     return SDValue();
8292 
8293   SDLoc DL(N);
8294 
8295   // If Y is a constant, check that 'andn' works with immediates. Unless M is
8296   // a bitwise not that would already allow ANDN to be used.
8297   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
8298     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
8299     // If not, we need to do a bit more work to make sure andn is still used.
8300     SDValue NotX = DAG.getNOT(DL, X, VT);
8301     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
8302     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
8303     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
8304     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
8305   }
8306 
8307   // If X is a constant and M is a bitwise not, check that 'andn' works with
8308   // immediates.
8309   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
8310     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
8311     // If not, we need to do a bit more work to make sure andn is still used.
8312     SDValue NotM = M.getOperand(0);
8313     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
8314     SDValue NotY = DAG.getNOT(DL, Y, VT);
8315     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
8316     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
8317     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
8318   }
8319 
8320   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
8321   SDValue NotM = DAG.getNOT(DL, M, VT);
8322   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
8323 
8324   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
8325 }
8326 
8327 SDValue DAGCombiner::visitXOR(SDNode *N) {
8328   SDValue N0 = N->getOperand(0);
8329   SDValue N1 = N->getOperand(1);
8330   EVT VT = N0.getValueType();
8331   SDLoc DL(N);
8332 
8333   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
8334   if (N0.isUndef() && N1.isUndef())
8335     return DAG.getConstant(0, DL, VT);
8336 
8337   // fold (xor x, undef) -> undef
8338   if (N0.isUndef())
8339     return N0;
8340   if (N1.isUndef())
8341     return N1;
8342 
8343   // fold (xor c1, c2) -> c1^c2
8344   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
8345     return C;
8346 
8347   // canonicalize constant to RHS
8348   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
8349       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
8350     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
8351 
8352   // fold vector ops
8353   if (VT.isVector()) {
8354     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
8355       return FoldedVOp;
8356 
8357     // fold (xor x, 0) -> x, vector edition
8358     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
8359       return N0;
8360   }
8361 
8362   // fold (xor x, 0) -> x
8363   if (isNullConstant(N1))
8364     return N0;
8365 
8366   if (SDValue NewSel = foldBinOpIntoSelect(N))
8367     return NewSel;
8368 
8369   // reassociate xor
8370   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
8371     return RXOR;
8372 
8373   // look for 'add-like' folds:
8374   // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
8375   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8376       isMinSignedConstant(N1))
8377     if (SDValue Combined = visitADDLike(N))
8378       return Combined;
8379 
8380   // fold !(x cc y) -> (x !cc y)
8381   unsigned N0Opcode = N0.getOpcode();
8382   SDValue LHS, RHS, CC;
8383   if (TLI.isConstTrueVal(N1) &&
8384       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
8385     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
8386                                                LHS.getValueType());
8387     if (!LegalOperations ||
8388         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
8389       switch (N0Opcode) {
8390       default:
8391         llvm_unreachable("Unhandled SetCC Equivalent!");
8392       case ISD::SETCC:
8393         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
8394       case ISD::SELECT_CC:
8395         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
8396                                N0.getOperand(3), NotCC);
8397       case ISD::STRICT_FSETCC:
8398       case ISD::STRICT_FSETCCS: {
8399         if (N0.hasOneUse()) {
8400           // FIXME Can we handle multiple uses? Could we token factor the chain
8401           // results from the new/old setcc?
8402           SDValue SetCC =
8403               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
8404                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
8405           CombineTo(N, SetCC);
8406           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
8407           recursivelyDeleteUnusedNodes(N0.getNode());
8408           return SDValue(N, 0); // Return N so it doesn't get rechecked!
8409         }
8410         break;
8411       }
8412       }
8413     }
8414   }
8415 
8416   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
8417   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8418       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
8419     SDValue V = N0.getOperand(0);
8420     SDLoc DL0(N0);
8421     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
8422                     DAG.getConstant(1, DL0, V.getValueType()));
8423     AddToWorklist(V.getNode());
8424     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
8425   }
8426 
8427   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
8428   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
8429       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8430     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8431     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
8432       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8433       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8434       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8435       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8436       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8437     }
8438   }
8439   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
8440   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
8441       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
8442     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
8443     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
8444       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
8445       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
8446       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
8447       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
8448       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
8449     }
8450   }
8451 
8452   // fold (not (neg x)) -> (add X, -1)
8453   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
8454   // Y is a constant or the subtract has a single use.
8455   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
8456       isNullConstant(N0.getOperand(0))) {
8457     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
8458                        DAG.getAllOnesConstant(DL, VT));
8459   }
8460 
8461   // fold (not (add X, -1)) -> (neg X)
8462   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
8463       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
8464     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
8465                        N0.getOperand(0));
8466   }
8467 
8468   // fold (xor (and x, y), y) -> (and (not x), y)
8469   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
8470     SDValue X = N0.getOperand(0);
8471     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
8472     AddToWorklist(NotX.getNode());
8473     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
8474   }
8475 
8476   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
8477     ConstantSDNode *XorC = isConstOrConstSplat(N1);
8478     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
8479     unsigned BitWidth = VT.getScalarSizeInBits();
8480     if (XorC && ShiftC) {
8481       // Don't crash on an oversized shift. We can not guarantee that a bogus
8482       // shift has been simplified to undef.
8483       uint64_t ShiftAmt = ShiftC->getLimitedValue();
8484       if (ShiftAmt < BitWidth) {
8485         APInt Ones = APInt::getAllOnes(BitWidth);
8486         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
8487         if (XorC->getAPIntValue() == Ones) {
8488           // If the xor constant is a shifted -1, do a 'not' before the shift:
8489           // xor (X << ShiftC), XorC --> (not X) << ShiftC
8490           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
8491           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
8492           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
8493         }
8494       }
8495     }
8496   }
8497 
8498   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
8499   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
8500     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
8501     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
8502     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
8503       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
8504       SDValue S0 = S.getOperand(0);
8505       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
8506         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
8507           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
8508             return DAG.getNode(ISD::ABS, DL, VT, S0);
8509     }
8510   }
8511 
8512   // fold (xor x, x) -> 0
8513   if (N0 == N1)
8514     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
8515 
8516   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
8517   // Here is a concrete example of this equivalence:
8518   // i16   x ==  14
8519   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
8520   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
8521   //
8522   // =>
8523   //
8524   // i16     ~1      == 0b1111111111111110
8525   // i16 rol(~1, 14) == 0b1011111111111111
8526   //
8527   // Some additional tips to help conceptualize this transform:
8528   // - Try to see the operation as placing a single zero in a value of all ones.
8529   // - There exists no value for x which would allow the result to contain zero.
8530   // - Values of x larger than the bitwidth are undefined and do not require a
8531   //   consistent result.
8532   // - Pushing the zero left requires shifting one bits in from the right.
8533   // A rotate left of ~1 is a nice way of achieving the desired result.
8534   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
8535       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
8536     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
8537                        N0.getOperand(1));
8538   }
8539 
8540   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
8541   if (N0Opcode == N1.getOpcode())
8542     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
8543       return V;
8544 
8545   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
8546     return R;
8547   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
8548     return R;
8549 
8550   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
8551   if (SDValue MM = unfoldMaskedMerge(N))
8552     return MM;
8553 
8554   // Simplify the expression using non-local knowledge.
8555   if (SimplifyDemandedBits(SDValue(N, 0)))
8556     return SDValue(N, 0);
8557 
8558   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
8559     return Combined;
8560 
8561   return SDValue();
8562 }
8563 
8564 /// If we have a shift-by-constant of a bitwise logic op that itself has a
8565 /// shift-by-constant operand with identical opcode, we may be able to convert
8566 /// that into 2 independent shifts followed by the logic op. This is a
8567 /// throughput improvement.
8568 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
8569   // Match a one-use bitwise logic op.
8570   SDValue LogicOp = Shift->getOperand(0);
8571   if (!LogicOp.hasOneUse())
8572     return SDValue();
8573 
8574   unsigned LogicOpcode = LogicOp.getOpcode();
8575   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
8576       LogicOpcode != ISD::XOR)
8577     return SDValue();
8578 
8579   // Find a matching one-use shift by constant.
8580   unsigned ShiftOpcode = Shift->getOpcode();
8581   SDValue C1 = Shift->getOperand(1);
8582   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
8583   assert(C1Node && "Expected a shift with constant operand");
8584   const APInt &C1Val = C1Node->getAPIntValue();
8585   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
8586                              const APInt *&ShiftAmtVal) {
8587     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
8588       return false;
8589 
8590     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
8591     if (!ShiftCNode)
8592       return false;
8593 
8594     // Capture the shifted operand and shift amount value.
8595     ShiftOp = V.getOperand(0);
8596     ShiftAmtVal = &ShiftCNode->getAPIntValue();
8597 
8598     // Shift amount types do not have to match their operand type, so check that
8599     // the constants are the same width.
8600     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
8601       return false;
8602 
8603     // The fold is not valid if the sum of the shift values exceeds bitwidth.
8604     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
8605       return false;
8606 
8607     return true;
8608   };
8609 
8610   // Logic ops are commutative, so check each operand for a match.
8611   SDValue X, Y;
8612   const APInt *C0Val;
8613   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
8614     Y = LogicOp.getOperand(1);
8615   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
8616     Y = LogicOp.getOperand(0);
8617   else
8618     return SDValue();
8619 
8620   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
8621   SDLoc DL(Shift);
8622   EVT VT = Shift->getValueType(0);
8623   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
8624   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
8625   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
8626   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
8627   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
8628 }
8629 
8630 /// Handle transforms common to the three shifts, when the shift amount is a
8631 /// constant.
8632 /// We are looking for: (shift being one of shl/sra/srl)
8633 ///   shift (binop X, C0), C1
8634 /// And want to transform into:
8635 ///   binop (shift X, C1), (shift C0, C1)
8636 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8637   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8638 
8639   // Do not turn a 'not' into a regular xor.
8640   if (isBitwiseNot(N->getOperand(0)))
8641     return SDValue();
8642 
8643   // The inner binop must be one-use, since we want to replace it.
8644   SDValue LHS = N->getOperand(0);
8645   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8646     return SDValue();
8647 
8648   // TODO: This is limited to early combining because it may reveal regressions
8649   //       otherwise. But since we just checked a target hook to see if this is
8650   //       desirable, that should have filtered out cases where this interferes
8651   //       with some other pattern matching.
8652   if (!LegalTypes)
8653     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8654       return R;
8655 
8656   // We want to pull some binops through shifts, so that we have (and (shift))
8657   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8658   // thing happens with address calculations, so it's important to canonicalize
8659   // it.
8660   switch (LHS.getOpcode()) {
8661   default:
8662     return SDValue();
8663   case ISD::OR:
8664   case ISD::XOR:
8665   case ISD::AND:
8666     break;
8667   case ISD::ADD:
8668     if (N->getOpcode() != ISD::SHL)
8669       return SDValue(); // only shl(add) not sr[al](add).
8670     break;
8671   }
8672 
8673   // We require the RHS of the binop to be a constant and not opaque as well.
8674   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8675   if (!BinOpCst)
8676     return SDValue();
8677 
8678   // FIXME: disable this unless the input to the binop is a shift by a constant
8679   // or is copy/select. Enable this in other cases when figure out it's exactly
8680   // profitable.
8681   SDValue BinOpLHSVal = LHS.getOperand(0);
8682   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8683                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8684                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8685                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8686   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8687                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8688 
8689   if (!IsShiftByConstant && !IsCopyOrSelect)
8690     return SDValue();
8691 
8692   if (IsCopyOrSelect && N->hasOneUse())
8693     return SDValue();
8694 
8695   // Fold the constants, shifting the binop RHS by the shift amount.
8696   SDLoc DL(N);
8697   EVT VT = N->getValueType(0);
8698   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8699                                N->getOperand(1));
8700   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8701 
8702   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8703                                  N->getOperand(1));
8704   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8705 }
8706 
8707 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8708   assert(N->getOpcode() == ISD::TRUNCATE);
8709   assert(N->getOperand(0).getOpcode() == ISD::AND);
8710 
8711   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8712   EVT TruncVT = N->getValueType(0);
8713   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8714       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8715     SDValue N01 = N->getOperand(0).getOperand(1);
8716     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8717       SDLoc DL(N);
8718       SDValue N00 = N->getOperand(0).getOperand(0);
8719       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8720       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8721       AddToWorklist(Trunc00.getNode());
8722       AddToWorklist(Trunc01.getNode());
8723       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8724     }
8725   }
8726 
8727   return SDValue();
8728 }
8729 
8730 SDValue DAGCombiner::visitRotate(SDNode *N) {
8731   SDLoc dl(N);
8732   SDValue N0 = N->getOperand(0);
8733   SDValue N1 = N->getOperand(1);
8734   EVT VT = N->getValueType(0);
8735   unsigned Bitsize = VT.getScalarSizeInBits();
8736 
8737   // fold (rot x, 0) -> x
8738   if (isNullOrNullSplat(N1))
8739     return N0;
8740 
8741   // fold (rot x, c) -> x iff (c % BitSize) == 0
8742   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8743     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8744     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8745       return N0;
8746   }
8747 
8748   // fold (rot x, c) -> (rot x, c % BitSize)
8749   bool OutOfRange = false;
8750   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8751     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8752     return true;
8753   };
8754   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8755     EVT AmtVT = N1.getValueType();
8756     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8757     if (SDValue Amt =
8758             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8759       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8760   }
8761 
8762   // rot i16 X, 8 --> bswap X
8763   auto *RotAmtC = isConstOrConstSplat(N1);
8764   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8765       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8766     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8767 
8768   // Simplify the operands using demanded-bits information.
8769   if (SimplifyDemandedBits(SDValue(N, 0)))
8770     return SDValue(N, 0);
8771 
8772   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8773   if (N1.getOpcode() == ISD::TRUNCATE &&
8774       N1.getOperand(0).getOpcode() == ISD::AND) {
8775     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8776       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8777   }
8778 
8779   unsigned NextOp = N0.getOpcode();
8780 
8781   // fold (rot* (rot* x, c2), c1)
8782   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize)
8783   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8784     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8785     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8786     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8787       EVT ShiftVT = C1->getValueType(0);
8788       bool SameSide = (N->getOpcode() == NextOp);
8789       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8790       SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8791       SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
8792                                                  {N1, BitsizeC});
8793       SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
8794                                                  {N0.getOperand(1), BitsizeC});
8795       if (Norm1 && Norm2)
8796         if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8797                 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
8798           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8799               ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8800           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8801                              CombinedShiftNorm);
8802         }
8803     }
8804   }
8805   return SDValue();
8806 }
8807 
8808 SDValue DAGCombiner::visitSHL(SDNode *N) {
8809   SDValue N0 = N->getOperand(0);
8810   SDValue N1 = N->getOperand(1);
8811   if (SDValue V = DAG.simplifyShift(N0, N1))
8812     return V;
8813 
8814   EVT VT = N0.getValueType();
8815   EVT ShiftVT = N1.getValueType();
8816   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8817 
8818   // fold (shl c1, c2) -> c1<<c2
8819   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8820     return C;
8821 
8822   // fold vector ops
8823   if (VT.isVector()) {
8824     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
8825       return FoldedVOp;
8826 
8827     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8828     // If setcc produces all-one true value then:
8829     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8830     if (N1CV && N1CV->isConstant()) {
8831       if (N0.getOpcode() == ISD::AND) {
8832         SDValue N00 = N0->getOperand(0);
8833         SDValue N01 = N0->getOperand(1);
8834         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8835 
8836         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8837             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8838                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8839           if (SDValue C =
8840                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8841             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8842         }
8843       }
8844     }
8845   }
8846 
8847   if (SDValue NewSel = foldBinOpIntoSelect(N))
8848     return NewSel;
8849 
8850   // if (shl x, c) is known to be zero, return 0
8851   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
8852     return DAG.getConstant(0, SDLoc(N), VT);
8853 
8854   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8855   if (N1.getOpcode() == ISD::TRUNCATE &&
8856       N1.getOperand(0).getOpcode() == ISD::AND) {
8857     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8858       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8859   }
8860 
8861   if (SimplifyDemandedBits(SDValue(N, 0)))
8862     return SDValue(N, 0);
8863 
8864   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8865   if (N0.getOpcode() == ISD::SHL) {
8866     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8867                                           ConstantSDNode *RHS) {
8868       APInt c1 = LHS->getAPIntValue();
8869       APInt c2 = RHS->getAPIntValue();
8870       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8871       return (c1 + c2).uge(OpSizeInBits);
8872     };
8873     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8874       return DAG.getConstant(0, SDLoc(N), VT);
8875 
8876     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8877                                        ConstantSDNode *RHS) {
8878       APInt c1 = LHS->getAPIntValue();
8879       APInt c2 = RHS->getAPIntValue();
8880       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8881       return (c1 + c2).ult(OpSizeInBits);
8882     };
8883     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8884       SDLoc DL(N);
8885       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8886       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8887     }
8888   }
8889 
8890   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8891   // For this to be valid, the second form must not preserve any of the bits
8892   // that are shifted out by the inner shift in the first form.  This means
8893   // the outer shift size must be >= the number of bits added by the ext.
8894   // As a corollary, we don't care what kind of ext it is.
8895   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8896        N0.getOpcode() == ISD::ANY_EXTEND ||
8897        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8898       N0.getOperand(0).getOpcode() == ISD::SHL) {
8899     SDValue N0Op0 = N0.getOperand(0);
8900     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8901     EVT InnerVT = N0Op0.getValueType();
8902     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8903 
8904     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8905                                                          ConstantSDNode *RHS) {
8906       APInt c1 = LHS->getAPIntValue();
8907       APInt c2 = RHS->getAPIntValue();
8908       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8909       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8910              (c1 + c2).uge(OpSizeInBits);
8911     };
8912     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8913                                   /*AllowUndefs*/ false,
8914                                   /*AllowTypeMismatch*/ true))
8915       return DAG.getConstant(0, SDLoc(N), VT);
8916 
8917     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8918                                                       ConstantSDNode *RHS) {
8919       APInt c1 = LHS->getAPIntValue();
8920       APInt c2 = RHS->getAPIntValue();
8921       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8922       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8923              (c1 + c2).ult(OpSizeInBits);
8924     };
8925     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8926                                   /*AllowUndefs*/ false,
8927                                   /*AllowTypeMismatch*/ true)) {
8928       SDLoc DL(N);
8929       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8930       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8931       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8932       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8933     }
8934   }
8935 
8936   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8937   // Only fold this if the inner zext has no other uses to avoid increasing
8938   // the total number of instructions.
8939   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8940       N0.getOperand(0).getOpcode() == ISD::SRL) {
8941     SDValue N0Op0 = N0.getOperand(0);
8942     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8943 
8944     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8945       APInt c1 = LHS->getAPIntValue();
8946       APInt c2 = RHS->getAPIntValue();
8947       zeroExtendToMatch(c1, c2);
8948       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8949     };
8950     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8951                                   /*AllowUndefs*/ false,
8952                                   /*AllowTypeMismatch*/ true)) {
8953       SDLoc DL(N);
8954       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8955       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8956       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8957       AddToWorklist(NewSHL.getNode());
8958       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8959     }
8960   }
8961 
8962   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
8963     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
8964                                            ConstantSDNode *RHS) {
8965       const APInt &LHSC = LHS->getAPIntValue();
8966       const APInt &RHSC = RHS->getAPIntValue();
8967       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
8968              LHSC.getZExtValue() <= RHSC.getZExtValue();
8969     };
8970 
8971     SDLoc DL(N);
8972 
8973     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8974     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
8975     if (N0->getFlags().hasExact()) {
8976       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
8977                                     /*AllowUndefs*/ false,
8978                                     /*AllowTypeMismatch*/ true)) {
8979         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8980         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
8981         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
8982       }
8983       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
8984                                     /*AllowUndefs*/ false,
8985                                     /*AllowTypeMismatch*/ true)) {
8986         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
8987         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
8988         return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
8989       }
8990     }
8991 
8992     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8993     //                               (and (srl x, (sub c1, c2), MASK)
8994     // Only fold this if the inner shift has no other uses -- if it does,
8995     // folding this will increase the total number of instructions.
8996     if (N0.getOpcode() == ISD::SRL &&
8997         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
8998         TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8999       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9000                                     /*AllowUndefs*/ false,
9001                                     /*AllowTypeMismatch*/ true)) {
9002         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9003         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9004         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9005         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
9006         Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
9007         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9008         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9009       }
9010       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9011                                     /*AllowUndefs*/ false,
9012                                     /*AllowTypeMismatch*/ true)) {
9013         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9014         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9015         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9016         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
9017         SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9018         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9019       }
9020     }
9021   }
9022 
9023   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
9024   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
9025       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
9026     SDLoc DL(N);
9027     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
9028     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
9029     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
9030   }
9031 
9032   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
9033   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
9034   // Variant of version done on multiply, except mul by a power of 2 is turned
9035   // into a shift.
9036   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
9037       N0->hasOneUse() &&
9038       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
9039       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
9040       TLI.isDesirableToCommuteWithShift(N, Level)) {
9041     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
9042     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
9043     AddToWorklist(Shl0.getNode());
9044     AddToWorklist(Shl1.getNode());
9045     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
9046   }
9047 
9048   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
9049   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
9050     SDValue N01 = N0.getOperand(1);
9051     if (SDValue Shl =
9052             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
9053       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
9054   }
9055 
9056   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9057   if (N1C && !N1C->isOpaque())
9058     if (SDValue NewSHL = visitShiftByConstant(N))
9059       return NewSHL;
9060 
9061   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
9062   if (N0.getOpcode() == ISD::VSCALE)
9063     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
9064       const APInt &C0 = N0.getConstantOperandAPInt(0);
9065       const APInt &C1 = NC1->getAPIntValue();
9066       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
9067     }
9068 
9069   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
9070   APInt ShlVal;
9071   if (N0.getOpcode() == ISD::STEP_VECTOR)
9072     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
9073       const APInt &C0 = N0.getConstantOperandAPInt(0);
9074       if (ShlVal.ult(C0.getBitWidth())) {
9075         APInt NewStep = C0 << ShlVal;
9076         return DAG.getStepVector(SDLoc(N), VT, NewStep);
9077       }
9078     }
9079 
9080   return SDValue();
9081 }
9082 
9083 // Transform a right shift of a multiply into a multiply-high.
9084 // Examples:
9085 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
9086 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
9087 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
9088                                   const TargetLowering &TLI) {
9089   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9090          "SRL or SRA node is required here!");
9091 
9092   // Check the shift amount. Proceed with the transformation if the shift
9093   // amount is constant.
9094   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9095   if (!ShiftAmtSrc)
9096     return SDValue();
9097 
9098   SDLoc DL(N);
9099 
9100   // The operation feeding into the shift must be a multiply.
9101   SDValue ShiftOperand = N->getOperand(0);
9102   if (ShiftOperand.getOpcode() != ISD::MUL)
9103     return SDValue();
9104 
9105   // Both operands must be equivalent extend nodes.
9106   SDValue LeftOp = ShiftOperand.getOperand(0);
9107   SDValue RightOp = ShiftOperand.getOperand(1);
9108 
9109   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9110   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9111 
9112   if (!IsSignExt && !IsZeroExt)
9113     return SDValue();
9114 
9115   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9116   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9117 
9118   SDValue MulhRightOp;
9119   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9120     unsigned ActiveBits = IsSignExt
9121                               ? Constant->getAPIntValue().getMinSignedBits()
9122                               : Constant->getAPIntValue().getActiveBits();
9123     if (ActiveBits > NarrowVTSize)
9124       return SDValue();
9125     MulhRightOp = DAG.getConstant(
9126         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9127         NarrowVT);
9128   } else {
9129     if (LeftOp.getOpcode() != RightOp.getOpcode())
9130       return SDValue();
9131     // Check that the two extend nodes are the same type.
9132     if (NarrowVT != RightOp.getOperand(0).getValueType())
9133       return SDValue();
9134     MulhRightOp = RightOp.getOperand(0);
9135   }
9136 
9137   EVT WideVT = LeftOp.getValueType();
9138   // Proceed with the transformation if the wide types match.
9139   assert((WideVT == RightOp.getValueType()) &&
9140          "Cannot have a multiply node with two different operand types.");
9141 
9142   // Proceed with the transformation if the wide type is twice as large
9143   // as the narrow type.
9144   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9145     return SDValue();
9146 
9147   // Check the shift amount with the narrow type size.
9148   // Proceed with the transformation if the shift amount is the width
9149   // of the narrow type.
9150   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9151   if (ShiftAmt != NarrowVTSize)
9152     return SDValue();
9153 
9154   // If the operation feeding into the MUL is a sign extend (sext),
9155   // we use mulhs. Othewise, zero extends (zext) use mulhu.
9156   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
9157 
9158   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
9159   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
9160     return SDValue();
9161 
9162   SDValue Result =
9163       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
9164   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
9165                                      : DAG.getZExtOrTrunc(Result, DL, WideVT));
9166 }
9167 
9168 SDValue DAGCombiner::visitSRA(SDNode *N) {
9169   SDValue N0 = N->getOperand(0);
9170   SDValue N1 = N->getOperand(1);
9171   if (SDValue V = DAG.simplifyShift(N0, N1))
9172     return V;
9173 
9174   EVT VT = N0.getValueType();
9175   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9176 
9177   // fold (sra c1, c2) -> (sra c1, c2)
9178   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
9179     return C;
9180 
9181   // Arithmetic shifting an all-sign-bit value is a no-op.
9182   // fold (sra 0, x) -> 0
9183   // fold (sra -1, x) -> -1
9184   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
9185     return N0;
9186 
9187   // fold vector ops
9188   if (VT.isVector())
9189     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9190       return FoldedVOp;
9191 
9192   if (SDValue NewSel = foldBinOpIntoSelect(N))
9193     return NewSel;
9194 
9195   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
9196   // sext_inreg.
9197   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9198   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
9199     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
9200     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
9201     if (VT.isVector())
9202       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
9203                                VT.getVectorElementCount());
9204     if (!LegalOperations ||
9205         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
9206         TargetLowering::Legal)
9207       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9208                          N0.getOperand(0), DAG.getValueType(ExtVT));
9209     // Even if we can't convert to sext_inreg, we might be able to remove
9210     // this shift pair if the input is already sign extended.
9211     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
9212       return N0.getOperand(0);
9213   }
9214 
9215   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
9216   // clamp (add c1, c2) to max shift.
9217   if (N0.getOpcode() == ISD::SRA) {
9218     SDLoc DL(N);
9219     EVT ShiftVT = N1.getValueType();
9220     EVT ShiftSVT = ShiftVT.getScalarType();
9221     SmallVector<SDValue, 16> ShiftValues;
9222 
9223     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9224       APInt c1 = LHS->getAPIntValue();
9225       APInt c2 = RHS->getAPIntValue();
9226       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9227       APInt Sum = c1 + c2;
9228       unsigned ShiftSum =
9229           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
9230       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
9231       return true;
9232     };
9233     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
9234       SDValue ShiftValue;
9235       if (N1.getOpcode() == ISD::BUILD_VECTOR)
9236         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
9237       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
9238         assert(ShiftValues.size() == 1 &&
9239                "Expected matchBinaryPredicate to return one element for "
9240                "SPLAT_VECTORs");
9241         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
9242       } else
9243         ShiftValue = ShiftValues[0];
9244       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
9245     }
9246   }
9247 
9248   // fold (sra (shl X, m), (sub result_size, n))
9249   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
9250   // result_size - n != m.
9251   // If truncate is free for the target sext(shl) is likely to result in better
9252   // code.
9253   if (N0.getOpcode() == ISD::SHL && N1C) {
9254     // Get the two constanst of the shifts, CN0 = m, CN = n.
9255     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
9256     if (N01C) {
9257       LLVMContext &Ctx = *DAG.getContext();
9258       // Determine what the truncate's result bitsize and type would be.
9259       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
9260 
9261       if (VT.isVector())
9262         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9263 
9264       // Determine the residual right-shift amount.
9265       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
9266 
9267       // If the shift is not a no-op (in which case this should be just a sign
9268       // extend already), the truncated to type is legal, sign_extend is legal
9269       // on that type, and the truncate to that type is both legal and free,
9270       // perform the transform.
9271       if ((ShiftAmt > 0) &&
9272           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
9273           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
9274           TLI.isTruncateFree(VT, TruncVT)) {
9275         SDLoc DL(N);
9276         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
9277             getShiftAmountTy(N0.getOperand(0).getValueType()));
9278         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
9279                                     N0.getOperand(0), Amt);
9280         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
9281                                     Shift);
9282         return DAG.getNode(ISD::SIGN_EXTEND, DL,
9283                            N->getValueType(0), Trunc);
9284       }
9285     }
9286   }
9287 
9288   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
9289   //   sra (add (shl X, N1C), AddC), N1C -->
9290   //   sext (add (trunc X to (width - N1C)), AddC')
9291   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
9292       N0.getOperand(0).getOpcode() == ISD::SHL &&
9293       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
9294     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
9295       SDValue Shl = N0.getOperand(0);
9296       // Determine what the truncate's type would be and ask the target if that
9297       // is a free operation.
9298       LLVMContext &Ctx = *DAG.getContext();
9299       unsigned ShiftAmt = N1C->getZExtValue();
9300       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
9301       if (VT.isVector())
9302         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
9303 
9304       // TODO: The simple type check probably belongs in the default hook
9305       //       implementation and/or target-specific overrides (because
9306       //       non-simple types likely require masking when legalized), but that
9307       //       restriction may conflict with other transforms.
9308       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
9309           TLI.isTruncateFree(VT, TruncVT)) {
9310         SDLoc DL(N);
9311         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
9312         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
9313                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
9314         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
9315         return DAG.getSExtOrTrunc(Add, DL, VT);
9316       }
9317     }
9318   }
9319 
9320   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
9321   if (N1.getOpcode() == ISD::TRUNCATE &&
9322       N1.getOperand(0).getOpcode() == ISD::AND) {
9323     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9324       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
9325   }
9326 
9327   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
9328   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
9329   //      if c1 is equal to the number of bits the trunc removes
9330   // TODO - support non-uniform vector shift amounts.
9331   if (N0.getOpcode() == ISD::TRUNCATE &&
9332       (N0.getOperand(0).getOpcode() == ISD::SRL ||
9333        N0.getOperand(0).getOpcode() == ISD::SRA) &&
9334       N0.getOperand(0).hasOneUse() &&
9335       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
9336     SDValue N0Op0 = N0.getOperand(0);
9337     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
9338       EVT LargeVT = N0Op0.getValueType();
9339       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
9340       if (LargeShift->getAPIntValue() == TruncBits) {
9341         SDLoc DL(N);
9342         EVT LargeShiftVT = getShiftAmountTy(LargeVT);
9343         SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
9344         Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
9345                           DAG.getConstant(TruncBits, DL, LargeShiftVT));
9346         SDValue SRA =
9347             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
9348         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
9349       }
9350     }
9351   }
9352 
9353   // Simplify, based on bits shifted out of the LHS.
9354   if (SimplifyDemandedBits(SDValue(N, 0)))
9355     return SDValue(N, 0);
9356 
9357   // If the sign bit is known to be zero, switch this to a SRL.
9358   if (DAG.SignBitIsZero(N0))
9359     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
9360 
9361   if (N1C && !N1C->isOpaque())
9362     if (SDValue NewSRA = visitShiftByConstant(N))
9363       return NewSRA;
9364 
9365   // Try to transform this shift into a multiply-high if
9366   // it matches the appropriate pattern detected in combineShiftToMULH.
9367   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9368     return MULH;
9369 
9370   // Attempt to convert a sra of a load into a narrower sign-extending load.
9371   if (SDValue NarrowLoad = reduceLoadWidth(N))
9372     return NarrowLoad;
9373 
9374   return SDValue();
9375 }
9376 
9377 SDValue DAGCombiner::visitSRL(SDNode *N) {
9378   SDValue N0 = N->getOperand(0);
9379   SDValue N1 = N->getOperand(1);
9380   if (SDValue V = DAG.simplifyShift(N0, N1))
9381     return V;
9382 
9383   EVT VT = N0.getValueType();
9384   EVT ShiftVT = N1.getValueType();
9385   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9386 
9387   // fold (srl c1, c2) -> c1 >>u c2
9388   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
9389     return C;
9390 
9391   // fold vector ops
9392   if (VT.isVector())
9393     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9394       return FoldedVOp;
9395 
9396   if (SDValue NewSel = foldBinOpIntoSelect(N))
9397     return NewSel;
9398 
9399   // if (srl x, c) is known to be zero, return 0
9400   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9401   if (N1C &&
9402       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9403     return DAG.getConstant(0, SDLoc(N), VT);
9404 
9405   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
9406   if (N0.getOpcode() == ISD::SRL) {
9407     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9408                                           ConstantSDNode *RHS) {
9409       APInt c1 = LHS->getAPIntValue();
9410       APInt c2 = RHS->getAPIntValue();
9411       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9412       return (c1 + c2).uge(OpSizeInBits);
9413     };
9414     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9415       return DAG.getConstant(0, SDLoc(N), VT);
9416 
9417     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9418                                        ConstantSDNode *RHS) {
9419       APInt c1 = LHS->getAPIntValue();
9420       APInt c2 = RHS->getAPIntValue();
9421       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9422       return (c1 + c2).ult(OpSizeInBits);
9423     };
9424     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9425       SDLoc DL(N);
9426       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9427       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
9428     }
9429   }
9430 
9431   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
9432       N0.getOperand(0).getOpcode() == ISD::SRL) {
9433     SDValue InnerShift = N0.getOperand(0);
9434     // TODO - support non-uniform vector shift amounts.
9435     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
9436       uint64_t c1 = N001C->getZExtValue();
9437       uint64_t c2 = N1C->getZExtValue();
9438       EVT InnerShiftVT = InnerShift.getValueType();
9439       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
9440       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
9441       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
9442       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
9443       if (c1 + OpSizeInBits == InnerShiftSize) {
9444         SDLoc DL(N);
9445         if (c1 + c2 >= InnerShiftSize)
9446           return DAG.getConstant(0, DL, VT);
9447         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9448         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9449                                        InnerShift.getOperand(0), NewShiftAmt);
9450         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
9451       }
9452       // In the more general case, we can clear the high bits after the shift:
9453       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
9454       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
9455           c1 + c2 < InnerShiftSize) {
9456         SDLoc DL(N);
9457         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
9458         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
9459                                        InnerShift.getOperand(0), NewShiftAmt);
9460         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
9461                                                             OpSizeInBits - c2),
9462                                        DL, InnerShiftVT);
9463         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
9464         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
9465       }
9466     }
9467   }
9468 
9469   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
9470   //                               (and (srl x, (sub c2, c1), MASK)
9471   if (N0.getOpcode() == ISD::SHL &&
9472       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
9473       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
9474     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9475                                            ConstantSDNode *RHS) {
9476       const APInt &LHSC = LHS->getAPIntValue();
9477       const APInt &RHSC = RHS->getAPIntValue();
9478       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9479              LHSC.getZExtValue() <= RHSC.getZExtValue();
9480     };
9481     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
9482                                   /*AllowUndefs*/ false,
9483                                   /*AllowTypeMismatch*/ true)) {
9484       SDLoc DL(N);
9485       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9486       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
9487       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9488       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
9489       Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
9490       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
9491       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9492     }
9493     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9494                                   /*AllowUndefs*/ false,
9495                                   /*AllowTypeMismatch*/ true)) {
9496       SDLoc DL(N);
9497       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
9498       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
9499       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
9500       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
9501       SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
9502       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
9503     }
9504   }
9505 
9506   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
9507   // TODO - support non-uniform vector shift amounts.
9508   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
9509     // Shifting in all undef bits?
9510     EVT SmallVT = N0.getOperand(0).getValueType();
9511     unsigned BitSize = SmallVT.getScalarSizeInBits();
9512     if (N1C->getAPIntValue().uge(BitSize))
9513       return DAG.getUNDEF(VT);
9514 
9515     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
9516       uint64_t ShiftAmt = N1C->getZExtValue();
9517       SDLoc DL0(N0);
9518       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
9519                                        N0.getOperand(0),
9520                           DAG.getConstant(ShiftAmt, DL0,
9521                                           getShiftAmountTy(SmallVT)));
9522       AddToWorklist(SmallShift.getNode());
9523       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
9524       SDLoc DL(N);
9525       return DAG.getNode(ISD::AND, DL, VT,
9526                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
9527                          DAG.getConstant(Mask, DL, VT));
9528     }
9529   }
9530 
9531   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
9532   // bit, which is unmodified by sra.
9533   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
9534     if (N0.getOpcode() == ISD::SRA)
9535       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
9536   }
9537 
9538   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
9539   if (N1C && N0.getOpcode() == ISD::CTLZ &&
9540       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
9541     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
9542 
9543     // If any of the input bits are KnownOne, then the input couldn't be all
9544     // zeros, thus the result of the srl will always be zero.
9545     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
9546 
9547     // If all of the bits input the to ctlz node are known to be zero, then
9548     // the result of the ctlz is "32" and the result of the shift is one.
9549     APInt UnknownBits = ~Known.Zero;
9550     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
9551 
9552     // Otherwise, check to see if there is exactly one bit input to the ctlz.
9553     if (UnknownBits.isPowerOf2()) {
9554       // Okay, we know that only that the single bit specified by UnknownBits
9555       // could be set on input to the CTLZ node. If this bit is set, the SRL
9556       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
9557       // to an SRL/XOR pair, which is likely to simplify more.
9558       unsigned ShAmt = UnknownBits.countTrailingZeros();
9559       SDValue Op = N0.getOperand(0);
9560 
9561       if (ShAmt) {
9562         SDLoc DL(N0);
9563         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
9564                   DAG.getConstant(ShAmt, DL,
9565                                   getShiftAmountTy(Op.getValueType())));
9566         AddToWorklist(Op.getNode());
9567       }
9568 
9569       SDLoc DL(N);
9570       return DAG.getNode(ISD::XOR, DL, VT,
9571                          Op, DAG.getConstant(1, DL, VT));
9572     }
9573   }
9574 
9575   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
9576   if (N1.getOpcode() == ISD::TRUNCATE &&
9577       N1.getOperand(0).getOpcode() == ISD::AND) {
9578     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9579       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
9580   }
9581 
9582   // fold operands of srl based on knowledge that the low bits are not
9583   // demanded.
9584   if (SimplifyDemandedBits(SDValue(N, 0)))
9585     return SDValue(N, 0);
9586 
9587   if (N1C && !N1C->isOpaque())
9588     if (SDValue NewSRL = visitShiftByConstant(N))
9589       return NewSRL;
9590 
9591   // Attempt to convert a srl of a load into a narrower zero-extending load.
9592   if (SDValue NarrowLoad = reduceLoadWidth(N))
9593     return NarrowLoad;
9594 
9595   // Here is a common situation. We want to optimize:
9596   //
9597   //   %a = ...
9598   //   %b = and i32 %a, 2
9599   //   %c = srl i32 %b, 1
9600   //   brcond i32 %c ...
9601   //
9602   // into
9603   //
9604   //   %a = ...
9605   //   %b = and %a, 2
9606   //   %c = setcc eq %b, 0
9607   //   brcond %c ...
9608   //
9609   // However when after the source operand of SRL is optimized into AND, the SRL
9610   // itself may not be optimized further. Look for it and add the BRCOND into
9611   // the worklist.
9612   if (N->hasOneUse()) {
9613     SDNode *Use = *N->use_begin();
9614     if (Use->getOpcode() == ISD::BRCOND)
9615       AddToWorklist(Use);
9616     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
9617       // Also look pass the truncate.
9618       Use = *Use->use_begin();
9619       if (Use->getOpcode() == ISD::BRCOND)
9620         AddToWorklist(Use);
9621     }
9622   }
9623 
9624   // Try to transform this shift into a multiply-high if
9625   // it matches the appropriate pattern detected in combineShiftToMULH.
9626   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
9627     return MULH;
9628 
9629   return SDValue();
9630 }
9631 
9632 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
9633   EVT VT = N->getValueType(0);
9634   SDValue N0 = N->getOperand(0);
9635   SDValue N1 = N->getOperand(1);
9636   SDValue N2 = N->getOperand(2);
9637   bool IsFSHL = N->getOpcode() == ISD::FSHL;
9638   unsigned BitWidth = VT.getScalarSizeInBits();
9639 
9640   // fold (fshl N0, N1, 0) -> N0
9641   // fold (fshr N0, N1, 0) -> N1
9642   if (isPowerOf2_32(BitWidth))
9643     if (DAG.MaskedValueIsZero(
9644             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
9645       return IsFSHL ? N0 : N1;
9646 
9647   auto IsUndefOrZero = [](SDValue V) {
9648     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
9649   };
9650 
9651   // TODO - support non-uniform vector shift amounts.
9652   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
9653     EVT ShAmtTy = N2.getValueType();
9654 
9655     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
9656     if (Cst->getAPIntValue().uge(BitWidth)) {
9657       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
9658       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
9659                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
9660     }
9661 
9662     unsigned ShAmt = Cst->getZExtValue();
9663     if (ShAmt == 0)
9664       return IsFSHL ? N0 : N1;
9665 
9666     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
9667     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
9668     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
9669     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
9670     if (IsUndefOrZero(N0))
9671       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
9672                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
9673                                          SDLoc(N), ShAmtTy));
9674     if (IsUndefOrZero(N1))
9675       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
9676                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
9677                                          SDLoc(N), ShAmtTy));
9678 
9679     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9680     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
9681     // TODO - bigendian support once we have test coverage.
9682     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
9683     // TODO - permit LHS EXTLOAD if extensions are shifted out.
9684     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
9685         !DAG.getDataLayout().isBigEndian()) {
9686       auto *LHS = dyn_cast<LoadSDNode>(N0);
9687       auto *RHS = dyn_cast<LoadSDNode>(N1);
9688       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
9689           LHS->getAddressSpace() == RHS->getAddressSpace() &&
9690           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
9691           ISD::isNON_EXTLoad(LHS)) {
9692         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
9693           SDLoc DL(RHS);
9694           uint64_t PtrOff =
9695               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
9696           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
9697           bool Fast = false;
9698           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9699                                      RHS->getAddressSpace(), NewAlign,
9700                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9701               Fast) {
9702             SDValue NewPtr = DAG.getMemBasePlusOffset(
9703                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9704             AddToWorklist(NewPtr.getNode());
9705             SDValue Load = DAG.getLoad(
9706                 VT, DL, RHS->getChain(), NewPtr,
9707                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9708                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9709             // Replace the old load's chain with the new load's chain.
9710             WorklistRemover DeadNodes(*this);
9711             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9712             return Load;
9713           }
9714         }
9715       }
9716     }
9717   }
9718 
9719   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9720   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9721   // iff We know the shift amount is in range.
9722   // TODO: when is it worth doing SUB(BW, N2) as well?
9723   if (isPowerOf2_32(BitWidth)) {
9724     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9725     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9726       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9727     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9728       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9729   }
9730 
9731   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9732   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9733   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9734   // is legal as well we might be better off avoiding non-constant (BW - N2).
9735   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9736   if (N0 == N1 && hasOperation(RotOpc, VT))
9737     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9738 
9739   // Simplify, based on bits shifted out of N0/N1.
9740   if (SimplifyDemandedBits(SDValue(N, 0)))
9741     return SDValue(N, 0);
9742 
9743   return SDValue();
9744 }
9745 
9746 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
9747   SDValue N0 = N->getOperand(0);
9748   SDValue N1 = N->getOperand(1);
9749   if (SDValue V = DAG.simplifyShift(N0, N1))
9750     return V;
9751 
9752   EVT VT = N0.getValueType();
9753 
9754   // fold (*shlsat c1, c2) -> c1<<c2
9755   if (SDValue C =
9756           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
9757     return C;
9758 
9759   ConstantSDNode *N1C = isConstOrConstSplat(N1);
9760 
9761   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
9762     // fold (sshlsat x, c) -> (shl x, c)
9763     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
9764         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
9765       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9766 
9767     // fold (ushlsat x, c) -> (shl x, c)
9768     if (N->getOpcode() == ISD::USHLSAT && N1C &&
9769         N1C->getAPIntValue().ule(
9770             DAG.computeKnownBits(N0).countMinLeadingZeros()))
9771       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
9772   }
9773 
9774   return SDValue();
9775 }
9776 
9777 // Given a ABS node, detect the following pattern:
9778 // (ABS (SUB (EXTEND a), (EXTEND b))).
9779 // Generates UABD/SABD instruction.
9780 static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG,
9781                                const TargetLowering &TLI) {
9782   SDValue AbsOp1 = N->getOperand(0);
9783   SDValue Op0, Op1;
9784 
9785   if (AbsOp1.getOpcode() != ISD::SUB)
9786     return SDValue();
9787 
9788   Op0 = AbsOp1.getOperand(0);
9789   Op1 = AbsOp1.getOperand(1);
9790 
9791   unsigned Opc0 = Op0.getOpcode();
9792   // Check if the operands of the sub are (zero|sign)-extended.
9793   if (Opc0 != Op1.getOpcode() ||
9794       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
9795     return SDValue();
9796 
9797   EVT VT = N->getValueType(0);
9798   EVT VT1 = Op0.getOperand(0).getValueType();
9799   EVT VT2 = Op1.getOperand(0).getValueType();
9800   unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU;
9801 
9802   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
9803   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
9804   // NOTE: Extensions must be equivalent.
9805   if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
9806     Op0 = Op0.getOperand(0);
9807     Op1 = Op1.getOperand(0);
9808     SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
9809     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
9810   }
9811 
9812   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
9813   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
9814   if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
9815     return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
9816 
9817   return SDValue();
9818 }
9819 
9820 SDValue DAGCombiner::visitABS(SDNode *N) {
9821   SDValue N0 = N->getOperand(0);
9822   EVT VT = N->getValueType(0);
9823 
9824   // fold (abs c1) -> c2
9825   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9826     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9827   // fold (abs (abs x)) -> (abs x)
9828   if (N0.getOpcode() == ISD::ABS)
9829     return N0;
9830   // fold (abs x) -> x iff not-negative
9831   if (DAG.SignBitIsZero(N0))
9832     return N0;
9833 
9834   if (SDValue ABD = combineABSToABD(N, DAG, TLI))
9835     return ABD;
9836 
9837   return SDValue();
9838 }
9839 
9840 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9841   SDValue N0 = N->getOperand(0);
9842   EVT VT = N->getValueType(0);
9843   SDLoc DL(N);
9844 
9845   // fold (bswap c1) -> c2
9846   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9847     return DAG.getNode(ISD::BSWAP, DL, VT, N0);
9848   // fold (bswap (bswap x)) -> x
9849   if (N0.getOpcode() == ISD::BSWAP)
9850     return N0.getOperand(0);
9851 
9852   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
9853   // isn't supported, it will be expanded to bswap followed by a manual reversal
9854   // of bits in each byte. By placing bswaps before bitreverse, we can remove
9855   // the two bswaps if the bitreverse gets expanded.
9856   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
9857     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9858     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
9859   }
9860 
9861   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
9862   // iff x >= bw/2 (i.e. lower half is known zero)
9863   unsigned BW = VT.getScalarSizeInBits();
9864   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
9865     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9866     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
9867     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9868         ShAmt->getZExtValue() >= (BW / 2) &&
9869         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
9870         TLI.isTruncateFree(VT, HalfVT) &&
9871         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
9872       SDValue Res = N0.getOperand(0);
9873       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
9874         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
9875                           DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
9876       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
9877       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
9878       return DAG.getZExtOrTrunc(Res, DL, VT);
9879     }
9880   }
9881 
9882   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
9883   // inverse-shift-of-bswap:
9884   // bswap (X u<< C) --> (bswap X) u>> C
9885   // bswap (X u>> C) --> (bswap X) u<< C
9886   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9887       N0.hasOneUse()) {
9888     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9889     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
9890         ShAmt->getZExtValue() % 8 == 0) {
9891       SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
9892       unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
9893       return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
9894     }
9895   }
9896 
9897   return SDValue();
9898 }
9899 
9900 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9901   SDValue N0 = N->getOperand(0);
9902   EVT VT = N->getValueType(0);
9903 
9904   // fold (bitreverse c1) -> c2
9905   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9906     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9907   // fold (bitreverse (bitreverse x)) -> x
9908   if (N0.getOpcode() == ISD::BITREVERSE)
9909     return N0.getOperand(0);
9910   return SDValue();
9911 }
9912 
9913 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9914   SDValue N0 = N->getOperand(0);
9915   EVT VT = N->getValueType(0);
9916 
9917   // fold (ctlz c1) -> c2
9918   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9919     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9920 
9921   // If the value is known never to be zero, switch to the undef version.
9922   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9923     if (DAG.isKnownNeverZero(N0))
9924       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9925   }
9926 
9927   return SDValue();
9928 }
9929 
9930 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9931   SDValue N0 = N->getOperand(0);
9932   EVT VT = N->getValueType(0);
9933 
9934   // fold (ctlz_zero_undef c1) -> c2
9935   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9936     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9937   return SDValue();
9938 }
9939 
9940 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9941   SDValue N0 = N->getOperand(0);
9942   EVT VT = N->getValueType(0);
9943 
9944   // fold (cttz c1) -> c2
9945   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9946     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9947 
9948   // If the value is known never to be zero, switch to the undef version.
9949   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9950     if (DAG.isKnownNeverZero(N0))
9951       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9952   }
9953 
9954   return SDValue();
9955 }
9956 
9957 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9958   SDValue N0 = N->getOperand(0);
9959   EVT VT = N->getValueType(0);
9960 
9961   // fold (cttz_zero_undef c1) -> c2
9962   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9963     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9964   return SDValue();
9965 }
9966 
9967 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9968   SDValue N0 = N->getOperand(0);
9969   EVT VT = N->getValueType(0);
9970 
9971   // fold (ctpop c1) -> c2
9972   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9973     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9974   return SDValue();
9975 }
9976 
9977 // FIXME: This should be checking for no signed zeros on individual operands, as
9978 // well as no nans.
9979 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9980                                          SDValue RHS,
9981                                          const TargetLowering &TLI) {
9982   const TargetOptions &Options = DAG.getTarget().Options;
9983   EVT VT = LHS.getValueType();
9984 
9985   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9986          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9987          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9988 }
9989 
9990 /// Generate Min/Max node
9991 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9992                                    SDValue RHS, SDValue True, SDValue False,
9993                                    ISD::CondCode CC, const TargetLowering &TLI,
9994                                    SelectionDAG &DAG) {
9995   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9996     return SDValue();
9997 
9998   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9999   switch (CC) {
10000   case ISD::SETOLT:
10001   case ISD::SETOLE:
10002   case ISD::SETLT:
10003   case ISD::SETLE:
10004   case ISD::SETULT:
10005   case ISD::SETULE: {
10006     // Since it's known never nan to get here already, either fminnum or
10007     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
10008     // expanded in terms of it.
10009     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
10010     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10011       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10012 
10013     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
10014     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10015       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10016     return SDValue();
10017   }
10018   case ISD::SETOGT:
10019   case ISD::SETOGE:
10020   case ISD::SETGT:
10021   case ISD::SETGE:
10022   case ISD::SETUGT:
10023   case ISD::SETUGE: {
10024     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
10025     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
10026       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
10027 
10028     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
10029     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
10030       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
10031     return SDValue();
10032   }
10033   default:
10034     return SDValue();
10035   }
10036 }
10037 
10038 /// If a (v)select has a condition value that is a sign-bit test, try to smear
10039 /// the condition operand sign-bit across the value width and use it as a mask.
10040 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
10041   SDValue Cond = N->getOperand(0);
10042   SDValue C1 = N->getOperand(1);
10043   SDValue C2 = N->getOperand(2);
10044   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
10045     return SDValue();
10046 
10047   EVT VT = N->getValueType(0);
10048   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
10049       VT != Cond.getOperand(0).getValueType())
10050     return SDValue();
10051 
10052   // The inverted-condition + commuted-select variants of these patterns are
10053   // canonicalized to these forms in IR.
10054   SDValue X = Cond.getOperand(0);
10055   SDValue CondC = Cond.getOperand(1);
10056   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
10057   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
10058       isAllOnesOrAllOnesSplat(C2)) {
10059     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
10060     SDLoc DL(N);
10061     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10062     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10063     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
10064   }
10065   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
10066     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
10067     SDLoc DL(N);
10068     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
10069     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
10070     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
10071   }
10072   return SDValue();
10073 }
10074 
10075 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
10076   SDValue Cond = N->getOperand(0);
10077   SDValue N1 = N->getOperand(1);
10078   SDValue N2 = N->getOperand(2);
10079   EVT VT = N->getValueType(0);
10080   EVT CondVT = Cond.getValueType();
10081   SDLoc DL(N);
10082 
10083   if (!VT.isInteger())
10084     return SDValue();
10085 
10086   auto *C1 = dyn_cast<ConstantSDNode>(N1);
10087   auto *C2 = dyn_cast<ConstantSDNode>(N2);
10088   if (!C1 || !C2)
10089     return SDValue();
10090 
10091   // Only do this before legalization to avoid conflicting with target-specific
10092   // transforms in the other direction (create a select from a zext/sext). There
10093   // is also a target-independent combine here in DAGCombiner in the other
10094   // direction for (select Cond, -1, 0) when the condition is not i1.
10095   if (CondVT == MVT::i1 && !LegalOperations) {
10096     if (C1->isZero() && C2->isOne()) {
10097       // select Cond, 0, 1 --> zext (!Cond)
10098       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10099       if (VT != MVT::i1)
10100         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
10101       return NotCond;
10102     }
10103     if (C1->isZero() && C2->isAllOnes()) {
10104       // select Cond, 0, -1 --> sext (!Cond)
10105       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
10106       if (VT != MVT::i1)
10107         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
10108       return NotCond;
10109     }
10110     if (C1->isOne() && C2->isZero()) {
10111       // select Cond, 1, 0 --> zext (Cond)
10112       if (VT != MVT::i1)
10113         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10114       return Cond;
10115     }
10116     if (C1->isAllOnes() && C2->isZero()) {
10117       // select Cond, -1, 0 --> sext (Cond)
10118       if (VT != MVT::i1)
10119         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10120       return Cond;
10121     }
10122 
10123     // Use a target hook because some targets may prefer to transform in the
10124     // other direction.
10125     if (TLI.convertSelectOfConstantsToMath(VT)) {
10126       // For any constants that differ by 1, we can transform the select into an
10127       // extend and add.
10128       const APInt &C1Val = C1->getAPIntValue();
10129       const APInt &C2Val = C2->getAPIntValue();
10130       if (C1Val - 1 == C2Val) {
10131         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
10132         if (VT != MVT::i1)
10133           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10134         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10135       }
10136       if (C1Val + 1 == C2Val) {
10137         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
10138         if (VT != MVT::i1)
10139           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
10140         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
10141       }
10142 
10143       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
10144       if (C1Val.isPowerOf2() && C2Val.isZero()) {
10145         if (VT != MVT::i1)
10146           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
10147         SDValue ShAmtC =
10148             DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
10149         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
10150       }
10151 
10152       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10153         return V;
10154     }
10155 
10156     return SDValue();
10157   }
10158 
10159   // fold (select Cond, 0, 1) -> (xor Cond, 1)
10160   // We can't do this reliably if integer based booleans have different contents
10161   // to floating point based booleans. This is because we can't tell whether we
10162   // have an integer-based boolean or a floating-point-based boolean unless we
10163   // can find the SETCC that produced it and inspect its operands. This is
10164   // fairly easy if C is the SETCC node, but it can potentially be
10165   // undiscoverable (or not reasonably discoverable). For example, it could be
10166   // in another basic block or it could require searching a complicated
10167   // expression.
10168   if (CondVT.isInteger() &&
10169       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
10170           TargetLowering::ZeroOrOneBooleanContent &&
10171       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
10172           TargetLowering::ZeroOrOneBooleanContent &&
10173       C1->isZero() && C2->isOne()) {
10174     SDValue NotCond =
10175         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
10176     if (VT.bitsEq(CondVT))
10177       return NotCond;
10178     return DAG.getZExtOrTrunc(NotCond, DL, VT);
10179   }
10180 
10181   return SDValue();
10182 }
10183 
10184 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
10185   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
10186          "Expected a (v)select");
10187   SDValue Cond = N->getOperand(0);
10188   SDValue T = N->getOperand(1), F = N->getOperand(2);
10189   EVT VT = N->getValueType(0);
10190   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
10191     return SDValue();
10192 
10193   // select Cond, Cond, F --> or Cond, F
10194   // select Cond, 1, F    --> or Cond, F
10195   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
10196     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
10197 
10198   // select Cond, T, Cond --> and Cond, T
10199   // select Cond, T, 0    --> and Cond, T
10200   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
10201     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
10202 
10203   // select Cond, T, 1 --> or (not Cond), T
10204   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
10205     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10206     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
10207   }
10208 
10209   // select Cond, 0, F --> and (not Cond), F
10210   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
10211     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
10212     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
10213   }
10214 
10215   return SDValue();
10216 }
10217 
10218 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
10219   SDValue N0 = N->getOperand(0);
10220   SDValue N1 = N->getOperand(1);
10221   SDValue N2 = N->getOperand(2);
10222   EVT VT = N->getValueType(0);
10223   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
10224     return SDValue();
10225 
10226   SDValue Cond0 = N0.getOperand(0);
10227   SDValue Cond1 = N0.getOperand(1);
10228   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10229   if (VT != Cond0.getValueType())
10230     return SDValue();
10231 
10232   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
10233   // compare is inverted from that pattern ("Cond0 s> -1").
10234   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
10235     ; // This is the pattern we are looking for.
10236   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
10237     std::swap(N1, N2);
10238   else
10239     return SDValue();
10240 
10241   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
10242   if (isNullOrNullSplat(N2)) {
10243     SDLoc DL(N);
10244     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10245     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10246     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
10247   }
10248 
10249   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
10250   if (isAllOnesOrAllOnesSplat(N1)) {
10251     SDLoc DL(N);
10252     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10253     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10254     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
10255   }
10256 
10257   // If we have to invert the sign bit mask, only do that transform if the
10258   // target has a bitwise 'and not' instruction (the invert is free).
10259   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
10260   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10261   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
10262     SDLoc DL(N);
10263     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
10264     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
10265     SDValue Not = DAG.getNOT(DL, Sra, VT);
10266     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
10267   }
10268 
10269   // TODO: There's another pattern in this family, but it may require
10270   //       implementing hasOrNot() to check for profitability:
10271   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
10272 
10273   return SDValue();
10274 }
10275 
10276 SDValue DAGCombiner::visitSELECT(SDNode *N) {
10277   SDValue N0 = N->getOperand(0);
10278   SDValue N1 = N->getOperand(1);
10279   SDValue N2 = N->getOperand(2);
10280   EVT VT = N->getValueType(0);
10281   EVT VT0 = N0.getValueType();
10282   SDLoc DL(N);
10283   SDNodeFlags Flags = N->getFlags();
10284 
10285   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10286     return V;
10287 
10288   if (SDValue V = foldSelectOfConstants(N))
10289     return V;
10290 
10291   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10292     return V;
10293 
10294   // If we can fold this based on the true/false value, do so.
10295   if (SimplifySelectOps(N, N1, N2))
10296     return SDValue(N, 0); // Don't revisit N.
10297 
10298   if (VT0 == MVT::i1) {
10299     // The code in this block deals with the following 2 equivalences:
10300     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
10301     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
10302     // The target can specify its preferred form with the
10303     // shouldNormalizeToSelectSequence() callback. However we always transform
10304     // to the right anyway if we find the inner select exists in the DAG anyway
10305     // and we always transform to the left side if we know that we can further
10306     // optimize the combination of the conditions.
10307     bool normalizeToSequence =
10308         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
10309     // select (and Cond0, Cond1), X, Y
10310     //   -> select Cond0, (select Cond1, X, Y), Y
10311     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
10312       SDValue Cond0 = N0->getOperand(0);
10313       SDValue Cond1 = N0->getOperand(1);
10314       SDValue InnerSelect =
10315           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
10316       if (normalizeToSequence || !InnerSelect.use_empty())
10317         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
10318                            InnerSelect, N2, Flags);
10319       // Cleanup on failure.
10320       if (InnerSelect.use_empty())
10321         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10322     }
10323     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
10324     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
10325       SDValue Cond0 = N0->getOperand(0);
10326       SDValue Cond1 = N0->getOperand(1);
10327       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
10328                                         Cond1, N1, N2, Flags);
10329       if (normalizeToSequence || !InnerSelect.use_empty())
10330         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
10331                            InnerSelect, Flags);
10332       // Cleanup on failure.
10333       if (InnerSelect.use_empty())
10334         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
10335     }
10336 
10337     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
10338     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
10339       SDValue N1_0 = N1->getOperand(0);
10340       SDValue N1_1 = N1->getOperand(1);
10341       SDValue N1_2 = N1->getOperand(2);
10342       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
10343         // Create the actual and node if we can generate good code for it.
10344         if (!normalizeToSequence) {
10345           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
10346           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
10347                              N2, Flags);
10348         }
10349         // Otherwise see if we can optimize the "and" to a better pattern.
10350         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
10351           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
10352                              N2, Flags);
10353         }
10354       }
10355     }
10356     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
10357     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
10358       SDValue N2_0 = N2->getOperand(0);
10359       SDValue N2_1 = N2->getOperand(1);
10360       SDValue N2_2 = N2->getOperand(2);
10361       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
10362         // Create the actual or node if we can generate good code for it.
10363         if (!normalizeToSequence) {
10364           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
10365           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
10366                              N2_2, Flags);
10367         }
10368         // Otherwise see if we can optimize to a better pattern.
10369         if (SDValue Combined = visitORLike(N0, N2_0, N))
10370           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
10371                              N2_2, Flags);
10372       }
10373     }
10374   }
10375 
10376   // select (not Cond), N1, N2 -> select Cond, N2, N1
10377   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
10378     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
10379     SelectOp->setFlags(Flags);
10380     return SelectOp;
10381   }
10382 
10383   // Fold selects based on a setcc into other things, such as min/max/abs.
10384   if (N0.getOpcode() == ISD::SETCC) {
10385     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
10386     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10387 
10388     // select (fcmp lt x, y), x, y -> fminnum x, y
10389     // select (fcmp gt x, y), x, y -> fmaxnum x, y
10390     //
10391     // This is OK if we don't care what happens if either operand is a NaN.
10392     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
10393       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
10394                                                 CC, TLI, DAG))
10395         return FMinMax;
10396 
10397     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
10398     // This is conservatively limited to pre-legal-operations to give targets
10399     // a chance to reverse the transform if they want to do that. Also, it is
10400     // unlikely that the pattern would be formed late, so it's probably not
10401     // worth going through the other checks.
10402     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
10403         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
10404         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
10405       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
10406       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
10407       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
10408         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
10409         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
10410         //
10411         // The IR equivalent of this transform would have this form:
10412         //   %a = add %x, C
10413         //   %c = icmp ugt %x, ~C
10414         //   %r = select %c, -1, %a
10415         //   =>
10416         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
10417         //   %u0 = extractvalue %u, 0
10418         //   %u1 = extractvalue %u, 1
10419         //   %r = select %u1, -1, %u0
10420         SDVTList VTs = DAG.getVTList(VT, VT0);
10421         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
10422         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
10423       }
10424     }
10425 
10426     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
10427         (!LegalOperations &&
10428          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
10429       // Any flags available in a select/setcc fold will be on the setcc as they
10430       // migrated from fcmp
10431       Flags = N0->getFlags();
10432       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
10433                                        N2, N0.getOperand(2));
10434       SelectNode->setFlags(Flags);
10435       return SelectNode;
10436     }
10437 
10438     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
10439       return NewSel;
10440   }
10441 
10442   if (!VT.isVector())
10443     if (SDValue BinOp = foldSelectOfBinops(N))
10444       return BinOp;
10445 
10446   return SDValue();
10447 }
10448 
10449 // This function assumes all the vselect's arguments are CONCAT_VECTOR
10450 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
10451 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
10452   SDLoc DL(N);
10453   SDValue Cond = N->getOperand(0);
10454   SDValue LHS = N->getOperand(1);
10455   SDValue RHS = N->getOperand(2);
10456   EVT VT = N->getValueType(0);
10457   int NumElems = VT.getVectorNumElements();
10458   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
10459          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
10460          Cond.getOpcode() == ISD::BUILD_VECTOR);
10461 
10462   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
10463   // binary ones here.
10464   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
10465     return SDValue();
10466 
10467   // We're sure we have an even number of elements due to the
10468   // concat_vectors we have as arguments to vselect.
10469   // Skip BV elements until we find one that's not an UNDEF
10470   // After we find an UNDEF element, keep looping until we get to half the
10471   // length of the BV and see if all the non-undef nodes are the same.
10472   ConstantSDNode *BottomHalf = nullptr;
10473   for (int i = 0; i < NumElems / 2; ++i) {
10474     if (Cond->getOperand(i)->isUndef())
10475       continue;
10476 
10477     if (BottomHalf == nullptr)
10478       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10479     else if (Cond->getOperand(i).getNode() != BottomHalf)
10480       return SDValue();
10481   }
10482 
10483   // Do the same for the second half of the BuildVector
10484   ConstantSDNode *TopHalf = nullptr;
10485   for (int i = NumElems / 2; i < NumElems; ++i) {
10486     if (Cond->getOperand(i)->isUndef())
10487       continue;
10488 
10489     if (TopHalf == nullptr)
10490       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
10491     else if (Cond->getOperand(i).getNode() != TopHalf)
10492       return SDValue();
10493   }
10494 
10495   assert(TopHalf && BottomHalf &&
10496          "One half of the selector was all UNDEFs and the other was all the "
10497          "same value. This should have been addressed before this function.");
10498   return DAG.getNode(
10499       ISD::CONCAT_VECTORS, DL, VT,
10500       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
10501       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
10502 }
10503 
10504 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
10505                        SelectionDAG &DAG) {
10506   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
10507     return false;
10508 
10509   // Only perform the transformation when existing operands can be reused.
10510   if (IndexIsScaled)
10511     return false;
10512 
10513   // For now we check only the LHS of the add.
10514   SDValue LHS = Index.getOperand(0);
10515   SDValue SplatVal = DAG.getSplatValue(LHS);
10516   if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType())
10517     return false;
10518 
10519   BasePtr = SplatVal;
10520   Index = Index.getOperand(1);
10521   return true;
10522 }
10523 
10524 // Fold sext/zext of index into index type.
10525 bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
10526                      SelectionDAG &DAG) {
10527   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10528 
10529   // It's always safe to look through zero extends.
10530   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
10531     SDValue Op = Index.getOperand(0);
10532     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
10533       IndexType = ISD::UNSIGNED_SCALED;
10534       Index = Op;
10535       return true;
10536     }
10537     if (ISD::isIndexTypeSigned(IndexType)) {
10538       IndexType = ISD::UNSIGNED_SCALED;
10539       return true;
10540     }
10541   }
10542 
10543   // It's only safe to look through sign extends when Index is signed.
10544   if (Index.getOpcode() == ISD::SIGN_EXTEND &&
10545       ISD::isIndexTypeSigned(IndexType)) {
10546     SDValue Op = Index.getOperand(0);
10547     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
10548       Index = Op;
10549       return true;
10550     }
10551   }
10552 
10553   return false;
10554 }
10555 
10556 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
10557   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
10558   SDValue Mask = MSC->getMask();
10559   SDValue Chain = MSC->getChain();
10560   SDValue Index = MSC->getIndex();
10561   SDValue Scale = MSC->getScale();
10562   SDValue StoreVal = MSC->getValue();
10563   SDValue BasePtr = MSC->getBasePtr();
10564   ISD::MemIndexType IndexType = MSC->getIndexType();
10565   SDLoc DL(N);
10566 
10567   // Zap scatters with a zero mask.
10568   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10569     return Chain;
10570 
10571   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) {
10572     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10573     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
10574                                 DL, Ops, MSC->getMemOperand(), IndexType,
10575                                 MSC->isTruncatingStore());
10576   }
10577 
10578   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
10579     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
10580     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
10581                                 DL, Ops, MSC->getMemOperand(), IndexType,
10582                                 MSC->isTruncatingStore());
10583   }
10584 
10585   return SDValue();
10586 }
10587 
10588 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
10589   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
10590   SDValue Mask = MST->getMask();
10591   SDValue Chain = MST->getChain();
10592   SDValue Value = MST->getValue();
10593   SDValue Ptr = MST->getBasePtr();
10594   SDLoc DL(N);
10595 
10596   // Zap masked stores with a zero mask.
10597   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10598     return Chain;
10599 
10600   // If this is a masked load with an all ones mask, we can use a unmasked load.
10601   // FIXME: Can we do this for indexed, compressing, or truncating stores?
10602   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
10603       !MST->isCompressingStore() && !MST->isTruncatingStore())
10604     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
10605                         MST->getBasePtr(), MST->getPointerInfo(),
10606                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
10607                         MST->getAAInfo());
10608 
10609   // Try transforming N to an indexed store.
10610   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10611     return SDValue(N, 0);
10612 
10613   if (MST->isTruncatingStore() && MST->isUnindexed() &&
10614       Value.getValueType().isInteger() &&
10615       (!isa<ConstantSDNode>(Value) ||
10616        !cast<ConstantSDNode>(Value)->isOpaque())) {
10617     APInt TruncDemandedBits =
10618         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
10619                              MST->getMemoryVT().getScalarSizeInBits());
10620 
10621     // See if we can simplify the operation with
10622     // SimplifyDemandedBits, which only works if the value has a single use.
10623     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
10624       // Re-visit the store if anything changed and the store hasn't been merged
10625       // with another node (N is deleted) SimplifyDemandedBits will add Value's
10626       // node back to the worklist if necessary, but we also need to re-visit
10627       // the Store node itself.
10628       if (N->getOpcode() != ISD::DELETED_NODE)
10629         AddToWorklist(N);
10630       return SDValue(N, 0);
10631     }
10632   }
10633 
10634   // If this is a TRUNC followed by a masked store, fold this into a masked
10635   // truncating store.  We can do this even if this is already a masked
10636   // truncstore.
10637   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
10638       MST->isUnindexed() &&
10639       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
10640                                MST->getMemoryVT(), LegalOperations)) {
10641     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
10642                                          Value.getOperand(0).getValueType());
10643     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
10644                               MST->getOffset(), Mask, MST->getMemoryVT(),
10645                               MST->getMemOperand(), MST->getAddressingMode(),
10646                               /*IsTruncating=*/true);
10647   }
10648 
10649   return SDValue();
10650 }
10651 
10652 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
10653   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
10654   SDValue Mask = MGT->getMask();
10655   SDValue Chain = MGT->getChain();
10656   SDValue Index = MGT->getIndex();
10657   SDValue Scale = MGT->getScale();
10658   SDValue PassThru = MGT->getPassThru();
10659   SDValue BasePtr = MGT->getBasePtr();
10660   ISD::MemIndexType IndexType = MGT->getIndexType();
10661   SDLoc DL(N);
10662 
10663   // Zap gathers with a zero mask.
10664   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10665     return CombineTo(N, PassThru, MGT->getChain());
10666 
10667   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) {
10668     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10669     return DAG.getMaskedGather(
10670         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
10671         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
10672   }
10673 
10674   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
10675     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
10676     return DAG.getMaskedGather(
10677         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
10678         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
10679   }
10680 
10681   return SDValue();
10682 }
10683 
10684 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
10685   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
10686   SDValue Mask = MLD->getMask();
10687   SDLoc DL(N);
10688 
10689   // Zap masked loads with a zero mask.
10690   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
10691     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
10692 
10693   // If this is a masked load with an all ones mask, we can use a unmasked load.
10694   // FIXME: Can we do this for indexed, expanding, or extending loads?
10695   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
10696       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
10697     SDValue NewLd = DAG.getLoad(
10698         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
10699         MLD->getPointerInfo(), MLD->getOriginalAlign(),
10700         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
10701     return CombineTo(N, NewLd, NewLd.getValue(1));
10702   }
10703 
10704   // Try transforming N to an indexed load.
10705   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10706     return SDValue(N, 0);
10707 
10708   return SDValue();
10709 }
10710 
10711 /// A vector select of 2 constant vectors can be simplified to math/logic to
10712 /// avoid a variable select instruction and possibly avoid constant loads.
10713 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
10714   SDValue Cond = N->getOperand(0);
10715   SDValue N1 = N->getOperand(1);
10716   SDValue N2 = N->getOperand(2);
10717   EVT VT = N->getValueType(0);
10718   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
10719       !TLI.convertSelectOfConstantsToMath(VT) ||
10720       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
10721       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
10722     return SDValue();
10723 
10724   // Check if we can use the condition value to increment/decrement a single
10725   // constant value. This simplifies a select to an add and removes a constant
10726   // load/materialization from the general case.
10727   bool AllAddOne = true;
10728   bool AllSubOne = true;
10729   unsigned Elts = VT.getVectorNumElements();
10730   for (unsigned i = 0; i != Elts; ++i) {
10731     SDValue N1Elt = N1.getOperand(i);
10732     SDValue N2Elt = N2.getOperand(i);
10733     if (N1Elt.isUndef() || N2Elt.isUndef())
10734       continue;
10735     if (N1Elt.getValueType() != N2Elt.getValueType())
10736       continue;
10737 
10738     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
10739     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
10740     if (C1 != C2 + 1)
10741       AllAddOne = false;
10742     if (C1 != C2 - 1)
10743       AllSubOne = false;
10744   }
10745 
10746   // Further simplifications for the extra-special cases where the constants are
10747   // all 0 or all -1 should be implemented as folds of these patterns.
10748   SDLoc DL(N);
10749   if (AllAddOne || AllSubOne) {
10750     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
10751     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
10752     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
10753     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
10754     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
10755   }
10756 
10757   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
10758   APInt Pow2C;
10759   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
10760       isNullOrNullSplat(N2)) {
10761     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
10762     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
10763     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
10764   }
10765 
10766   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
10767     return V;
10768 
10769   // The general case for select-of-constants:
10770   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
10771   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
10772   // leave that to a machine-specific pass.
10773   return SDValue();
10774 }
10775 
10776 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
10777   SDValue N0 = N->getOperand(0);
10778   SDValue N1 = N->getOperand(1);
10779   SDValue N2 = N->getOperand(2);
10780   EVT VT = N->getValueType(0);
10781   SDLoc DL(N);
10782 
10783   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
10784     return V;
10785 
10786   if (SDValue V = foldBoolSelectToLogic(N, DAG))
10787     return V;
10788 
10789   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
10790   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
10791     return DAG.getSelect(DL, VT, F, N2, N1);
10792 
10793   // Canonicalize integer abs.
10794   // vselect (setg[te] X,  0),  X, -X ->
10795   // vselect (setgt    X, -1),  X, -X ->
10796   // vselect (setl[te] X,  0), -X,  X ->
10797   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
10798   if (N0.getOpcode() == ISD::SETCC) {
10799     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
10800     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10801     bool isAbs = false;
10802     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
10803 
10804     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
10805          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
10806         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
10807       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
10808     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
10809              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
10810       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
10811 
10812     if (isAbs) {
10813       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
10814         return DAG.getNode(ISD::ABS, DL, VT, LHS);
10815 
10816       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
10817                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
10818                                                   DL, getShiftAmountTy(VT)));
10819       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
10820       AddToWorklist(Shift.getNode());
10821       AddToWorklist(Add.getNode());
10822       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
10823     }
10824 
10825     // vselect x, y (fcmp lt x, y) -> fminnum x, y
10826     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
10827     //
10828     // This is OK if we don't care about what happens if either operand is a
10829     // NaN.
10830     //
10831     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
10832       if (SDValue FMinMax =
10833               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
10834         return FMinMax;
10835     }
10836 
10837     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10838       return S;
10839     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
10840       return S;
10841 
10842     // If this select has a condition (setcc) with narrower operands than the
10843     // select, try to widen the compare to match the select width.
10844     // TODO: This should be extended to handle any constant.
10845     // TODO: This could be extended to handle non-loading patterns, but that
10846     //       requires thorough testing to avoid regressions.
10847     if (isNullOrNullSplat(RHS)) {
10848       EVT NarrowVT = LHS.getValueType();
10849       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
10850       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
10851       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
10852       unsigned WideWidth = WideVT.getScalarSizeInBits();
10853       bool IsSigned = isSignedIntSetCC(CC);
10854       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10855       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
10856           SetCCWidth != 1 && SetCCWidth < WideWidth &&
10857           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
10858           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
10859         // Both compare operands can be widened for free. The LHS can use an
10860         // extended load, and the RHS is a constant:
10861         //   vselect (ext (setcc load(X), C)), N1, N2 -->
10862         //   vselect (setcc extload(X), C'), N1, N2
10863         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
10864         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
10865         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
10866         EVT WideSetCCVT = getSetCCResultType(WideVT);
10867         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
10868         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
10869       }
10870     }
10871 
10872     // Match VSELECTs into add with unsigned saturation.
10873     if (hasOperation(ISD::UADDSAT, VT)) {
10874       // Check if one of the arms of the VSELECT is vector with all bits set.
10875       // If it's on the left side invert the predicate to simplify logic below.
10876       SDValue Other;
10877       ISD::CondCode SatCC = CC;
10878       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
10879         Other = N2;
10880         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10881       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
10882         Other = N1;
10883       }
10884 
10885       if (Other && Other.getOpcode() == ISD::ADD) {
10886         SDValue CondLHS = LHS, CondRHS = RHS;
10887         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10888 
10889         // Canonicalize condition operands.
10890         if (SatCC == ISD::SETUGE) {
10891           std::swap(CondLHS, CondRHS);
10892           SatCC = ISD::SETULE;
10893         }
10894 
10895         // We can test against either of the addition operands.
10896         // x <= x+y ? x+y : ~0 --> uaddsat x, y
10897         // x+y >= x ? x+y : ~0 --> uaddsat x, y
10898         if (SatCC == ISD::SETULE && Other == CondRHS &&
10899             (OpLHS == CondLHS || OpRHS == CondLHS))
10900           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10901 
10902         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
10903             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10904              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
10905             CondLHS == OpLHS) {
10906           // If the RHS is a constant we have to reverse the const
10907           // canonicalization.
10908           // x >= ~C ? x+C : ~0 --> uaddsat x, C
10909           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10910             return Cond->getAPIntValue() == ~Op->getAPIntValue();
10911           };
10912           if (SatCC == ISD::SETULE &&
10913               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
10914             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
10915         }
10916       }
10917     }
10918 
10919     // Match VSELECTs into sub with unsigned saturation.
10920     if (hasOperation(ISD::USUBSAT, VT)) {
10921       // Check if one of the arms of the VSELECT is a zero vector. If it's on
10922       // the left side invert the predicate to simplify logic below.
10923       SDValue Other;
10924       ISD::CondCode SatCC = CC;
10925       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
10926         Other = N2;
10927         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
10928       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
10929         Other = N1;
10930       }
10931 
10932       // zext(x) >= y ? trunc(zext(x) - y) : 0
10933       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
10934       // zext(x) >  y ? trunc(zext(x) - y) : 0
10935       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
10936       if (Other && Other.getOpcode() == ISD::TRUNCATE &&
10937           Other.getOperand(0).getOpcode() == ISD::SUB &&
10938           (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
10939         SDValue OpLHS = Other.getOperand(0).getOperand(0);
10940         SDValue OpRHS = Other.getOperand(0).getOperand(1);
10941         if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
10942           if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
10943                                               DAG, DL))
10944             return R;
10945       }
10946 
10947       if (Other && Other.getNumOperands() == 2) {
10948         SDValue CondRHS = RHS;
10949         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
10950 
10951         if (OpLHS == LHS) {
10952           // Look for a general sub with unsigned saturation first.
10953           // x >= y ? x-y : 0 --> usubsat x, y
10954           // x >  y ? x-y : 0 --> usubsat x, y
10955           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10956               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10957             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10958 
10959           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
10960               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10961             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
10962                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
10963               // If the RHS is a constant we have to reverse the const
10964               // canonicalization.
10965               // x > C-1 ? x+-C : 0 --> usubsat x, C
10966               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10967                 return (!Op && !Cond) ||
10968                        (Op && Cond &&
10969                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10970               };
10971               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10972                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10973                                             /*AllowUndefs*/ true)) {
10974                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10975                                     DAG.getConstant(0, DL, VT), OpRHS);
10976                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10977               }
10978 
10979               // Another special case: If C was a sign bit, the sub has been
10980               // canonicalized into a xor.
10981               // FIXME: Would it be better to use computeKnownBits to
10982               // determine whether it's safe to decanonicalize the xor?
10983               // x s< 0 ? x^C : 0 --> usubsat x, C
10984               APInt SplatValue;
10985               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10986                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
10987                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
10988                   SplatValue.isSignMask()) {
10989                 // Note that we have to rebuild the RHS constant here to
10990                 // ensure we don't rely on particular values of undef lanes.
10991                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
10992                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10993               }
10994             }
10995           }
10996         }
10997       }
10998     }
10999   }
11000 
11001   if (SimplifySelectOps(N, N1, N2))
11002     return SDValue(N, 0);  // Don't revisit N.
11003 
11004   // Fold (vselect all_ones, N1, N2) -> N1
11005   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
11006     return N1;
11007   // Fold (vselect all_zeros, N1, N2) -> N2
11008   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
11009     return N2;
11010 
11011   // The ConvertSelectToConcatVector function is assuming both the above
11012   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
11013   // and addressed.
11014   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
11015       N2.getOpcode() == ISD::CONCAT_VECTORS &&
11016       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
11017     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
11018       return CV;
11019   }
11020 
11021   if (SDValue V = foldVSelectOfConstants(N))
11022     return V;
11023 
11024   if (hasOperation(ISD::SRA, VT))
11025     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
11026       return V;
11027 
11028   return SDValue();
11029 }
11030 
11031 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
11032   SDValue N0 = N->getOperand(0);
11033   SDValue N1 = N->getOperand(1);
11034   SDValue N2 = N->getOperand(2);
11035   SDValue N3 = N->getOperand(3);
11036   SDValue N4 = N->getOperand(4);
11037   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
11038 
11039   // fold select_cc lhs, rhs, x, x, cc -> x
11040   if (N2 == N3)
11041     return N2;
11042 
11043   // Determine if the condition we're dealing with is constant
11044   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
11045                                   CC, SDLoc(N), false)) {
11046     AddToWorklist(SCC.getNode());
11047 
11048     // cond always true -> true val
11049     // cond always false -> false val
11050     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
11051       return SCCC->isZero() ? N3 : N2;
11052 
11053     // When the condition is UNDEF, just return the first operand. This is
11054     // coherent the DAG creation, no setcc node is created in this case
11055     if (SCC->isUndef())
11056       return N2;
11057 
11058     // Fold to a simpler select_cc
11059     if (SCC.getOpcode() == ISD::SETCC) {
11060       SDValue SelectOp = DAG.getNode(
11061           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
11062           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
11063       SelectOp->setFlags(SCC->getFlags());
11064       return SelectOp;
11065     }
11066   }
11067 
11068   // If we can fold this based on the true/false value, do so.
11069   if (SimplifySelectOps(N, N2, N3))
11070     return SDValue(N, 0);  // Don't revisit N.
11071 
11072   // fold select_cc into other things, such as min/max/abs
11073   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
11074 }
11075 
11076 SDValue DAGCombiner::visitSETCC(SDNode *N) {
11077   // setcc is very commonly used as an argument to brcond. This pattern
11078   // also lend itself to numerous combines and, as a result, it is desired
11079   // we keep the argument to a brcond as a setcc as much as possible.
11080   bool PreferSetCC =
11081       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
11082 
11083   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
11084   EVT VT = N->getValueType(0);
11085 
11086   //   SETCC(FREEZE(X), CONST, Cond)
11087   // =>
11088   //   FREEZE(SETCC(X, CONST, Cond))
11089   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
11090   // isn't equivalent to true or false.
11091   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
11092   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
11093   //
11094   // This transformation is beneficial because visitBRCOND can fold
11095   // BRCOND(FREEZE(X)) to BRCOND(X).
11096 
11097   // Conservatively optimize integer comparisons only.
11098   if (PreferSetCC) {
11099     // Do this only when SETCC is going to be used by BRCOND.
11100 
11101     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
11102     ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
11103     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
11104     bool Updated = false;
11105 
11106     // Is 'X Cond C' always true or false?
11107     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
11108       bool False = (Cond == ISD::SETULT && C->isZero()) ||
11109                    (Cond == ISD::SETLT  && C->isMinSignedValue()) ||
11110                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
11111                    (Cond == ISD::SETGT  && C->isMaxSignedValue());
11112       bool True =  (Cond == ISD::SETULE && C->isAllOnes()) ||
11113                    (Cond == ISD::SETLE  && C->isMaxSignedValue()) ||
11114                    (Cond == ISD::SETUGE && C->isZero()) ||
11115                    (Cond == ISD::SETGE  && C->isMinSignedValue());
11116       return True || False;
11117     };
11118 
11119     if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
11120       if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
11121         N0 = N0->getOperand(0);
11122         Updated = true;
11123       }
11124     }
11125     if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
11126       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
11127                                N0C)) {
11128         N1 = N1->getOperand(0);
11129         Updated = true;
11130       }
11131     }
11132 
11133     if (Updated)
11134       return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
11135   }
11136 
11137   SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
11138                                    SDLoc(N), !PreferSetCC);
11139 
11140   if (!Combined)
11141     return SDValue();
11142 
11143   // If we prefer to have a setcc, and we don't, we'll try our best to
11144   // recreate one using rebuildSetCC.
11145   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
11146     SDValue NewSetCC = rebuildSetCC(Combined);
11147 
11148     // We don't have anything interesting to combine to.
11149     if (NewSetCC.getNode() == N)
11150       return SDValue();
11151 
11152     if (NewSetCC)
11153       return NewSetCC;
11154   }
11155 
11156   return Combined;
11157 }
11158 
11159 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
11160   SDValue LHS = N->getOperand(0);
11161   SDValue RHS = N->getOperand(1);
11162   SDValue Carry = N->getOperand(2);
11163   SDValue Cond = N->getOperand(3);
11164 
11165   // If Carry is false, fold to a regular SETCC.
11166   if (isNullConstant(Carry))
11167     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
11168 
11169   return SDValue();
11170 }
11171 
11172 /// Check if N satisfies:
11173 ///   N is used once.
11174 ///   N is a Load.
11175 ///   The load is compatible with ExtOpcode. It means
11176 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
11177 ///     extension.
11178 ///     Otherwise returns true.
11179 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
11180   if (!N.hasOneUse())
11181     return false;
11182 
11183   if (!isa<LoadSDNode>(N))
11184     return false;
11185 
11186   LoadSDNode *Load = cast<LoadSDNode>(N);
11187   ISD::LoadExtType LoadExt = Load->getExtensionType();
11188   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
11189     return true;
11190 
11191   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
11192   // extension.
11193   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
11194       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
11195     return false;
11196 
11197   return true;
11198 }
11199 
11200 /// Fold
11201 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
11202 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
11203 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
11204 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11205 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11206 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
11207                                          SelectionDAG &DAG) {
11208   unsigned Opcode = N->getOpcode();
11209   SDValue N0 = N->getOperand(0);
11210   EVT VT = N->getValueType(0);
11211   SDLoc DL(N);
11212 
11213   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11214           Opcode == ISD::ANY_EXTEND) &&
11215          "Expected EXTEND dag node in input!");
11216 
11217   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
11218       !N0.hasOneUse())
11219     return SDValue();
11220 
11221   SDValue Op1 = N0->getOperand(1);
11222   SDValue Op2 = N0->getOperand(2);
11223   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
11224     return SDValue();
11225 
11226   auto ExtLoadOpcode = ISD::EXTLOAD;
11227   if (Opcode == ISD::SIGN_EXTEND)
11228     ExtLoadOpcode = ISD::SEXTLOAD;
11229   else if (Opcode == ISD::ZERO_EXTEND)
11230     ExtLoadOpcode = ISD::ZEXTLOAD;
11231 
11232   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
11233   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
11234   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
11235       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
11236     return SDValue();
11237 
11238   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
11239   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
11240   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
11241 }
11242 
11243 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
11244 /// a build_vector of constants.
11245 /// This function is called by the DAGCombiner when visiting sext/zext/aext
11246 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
11247 /// Vector extends are not folded if operations are legal; this is to
11248 /// avoid introducing illegal build_vector dag nodes.
11249 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
11250                                          SelectionDAG &DAG, bool LegalTypes) {
11251   unsigned Opcode = N->getOpcode();
11252   SDValue N0 = N->getOperand(0);
11253   EVT VT = N->getValueType(0);
11254   SDLoc DL(N);
11255 
11256   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
11257          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
11258          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
11259          && "Expected EXTEND dag node in input!");
11260 
11261   // fold (sext c1) -> c1
11262   // fold (zext c1) -> c1
11263   // fold (aext c1) -> c1
11264   if (isa<ConstantSDNode>(N0))
11265     return DAG.getNode(Opcode, DL, VT, N0);
11266 
11267   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11268   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
11269   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
11270   if (N0->getOpcode() == ISD::SELECT) {
11271     SDValue Op1 = N0->getOperand(1);
11272     SDValue Op2 = N0->getOperand(2);
11273     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
11274         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
11275       // For any_extend, choose sign extension of the constants to allow a
11276       // possible further transform to sign_extend_inreg.i.e.
11277       //
11278       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
11279       // t2: i64 = any_extend t1
11280       // -->
11281       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
11282       // -->
11283       // t4: i64 = sign_extend_inreg t3
11284       unsigned FoldOpc = Opcode;
11285       if (FoldOpc == ISD::ANY_EXTEND)
11286         FoldOpc = ISD::SIGN_EXTEND;
11287       return DAG.getSelect(DL, VT, N0->getOperand(0),
11288                            DAG.getNode(FoldOpc, DL, VT, Op1),
11289                            DAG.getNode(FoldOpc, DL, VT, Op2));
11290     }
11291   }
11292 
11293   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
11294   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
11295   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
11296   EVT SVT = VT.getScalarType();
11297   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
11298       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
11299     return SDValue();
11300 
11301   // We can fold this node into a build_vector.
11302   unsigned VTBits = SVT.getSizeInBits();
11303   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
11304   SmallVector<SDValue, 8> Elts;
11305   unsigned NumElts = VT.getVectorNumElements();
11306 
11307   // For zero-extensions, UNDEF elements still guarantee to have the upper
11308   // bits set to zero.
11309   bool IsZext =
11310       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
11311 
11312   for (unsigned i = 0; i != NumElts; ++i) {
11313     SDValue Op = N0.getOperand(i);
11314     if (Op.isUndef()) {
11315       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
11316       continue;
11317     }
11318 
11319     SDLoc DL(Op);
11320     // Get the constant value and if needed trunc it to the size of the type.
11321     // Nodes like build_vector might have constants wider than the scalar type.
11322     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
11323     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
11324       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
11325     else
11326       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
11327   }
11328 
11329   return DAG.getBuildVector(VT, DL, Elts);
11330 }
11331 
11332 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
11333 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
11334 // transformation. Returns true if extension are possible and the above
11335 // mentioned transformation is profitable.
11336 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
11337                                     unsigned ExtOpc,
11338                                     SmallVectorImpl<SDNode *> &ExtendNodes,
11339                                     const TargetLowering &TLI) {
11340   bool HasCopyToRegUses = false;
11341   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
11342   for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
11343        ++UI) {
11344     SDNode *User = *UI;
11345     if (User == N)
11346       continue;
11347     if (UI.getUse().getResNo() != N0.getResNo())
11348       continue;
11349     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
11350     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
11351       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
11352       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
11353         // Sign bits will be lost after a zext.
11354         return false;
11355       bool Add = false;
11356       for (unsigned i = 0; i != 2; ++i) {
11357         SDValue UseOp = User->getOperand(i);
11358         if (UseOp == N0)
11359           continue;
11360         if (!isa<ConstantSDNode>(UseOp))
11361           return false;
11362         Add = true;
11363       }
11364       if (Add)
11365         ExtendNodes.push_back(User);
11366       continue;
11367     }
11368     // If truncates aren't free and there are users we can't
11369     // extend, it isn't worthwhile.
11370     if (!isTruncFree)
11371       return false;
11372     // Remember if this value is live-out.
11373     if (User->getOpcode() == ISD::CopyToReg)
11374       HasCopyToRegUses = true;
11375   }
11376 
11377   if (HasCopyToRegUses) {
11378     bool BothLiveOut = false;
11379     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
11380          UI != UE; ++UI) {
11381       SDUse &Use = UI.getUse();
11382       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
11383         BothLiveOut = true;
11384         break;
11385       }
11386     }
11387     if (BothLiveOut)
11388       // Both unextended and extended values are live out. There had better be
11389       // a good reason for the transformation.
11390       return ExtendNodes.size();
11391   }
11392   return true;
11393 }
11394 
11395 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
11396                                   SDValue OrigLoad, SDValue ExtLoad,
11397                                   ISD::NodeType ExtType) {
11398   // Extend SetCC uses if necessary.
11399   SDLoc DL(ExtLoad);
11400   for (SDNode *SetCC : SetCCs) {
11401     SmallVector<SDValue, 4> Ops;
11402 
11403     for (unsigned j = 0; j != 2; ++j) {
11404       SDValue SOp = SetCC->getOperand(j);
11405       if (SOp == OrigLoad)
11406         Ops.push_back(ExtLoad);
11407       else
11408         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
11409     }
11410 
11411     Ops.push_back(SetCC->getOperand(2));
11412     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
11413   }
11414 }
11415 
11416 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
11417 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
11418   SDValue N0 = N->getOperand(0);
11419   EVT DstVT = N->getValueType(0);
11420   EVT SrcVT = N0.getValueType();
11421 
11422   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11423           N->getOpcode() == ISD::ZERO_EXTEND) &&
11424          "Unexpected node type (not an extend)!");
11425 
11426   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
11427   // For example, on a target with legal v4i32, but illegal v8i32, turn:
11428   //   (v8i32 (sext (v8i16 (load x))))
11429   // into:
11430   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
11431   //                          (v4i32 (sextload (x + 16)))))
11432   // Where uses of the original load, i.e.:
11433   //   (v8i16 (load x))
11434   // are replaced with:
11435   //   (v8i16 (truncate
11436   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
11437   //                            (v4i32 (sextload (x + 16)))))))
11438   //
11439   // This combine is only applicable to illegal, but splittable, vectors.
11440   // All legal types, and illegal non-vector types, are handled elsewhere.
11441   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
11442   //
11443   if (N0->getOpcode() != ISD::LOAD)
11444     return SDValue();
11445 
11446   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11447 
11448   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
11449       !N0.hasOneUse() || !LN0->isSimple() ||
11450       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
11451       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11452     return SDValue();
11453 
11454   SmallVector<SDNode *, 4> SetCCs;
11455   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
11456     return SDValue();
11457 
11458   ISD::LoadExtType ExtType =
11459       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11460 
11461   // Try to split the vector types to get down to legal types.
11462   EVT SplitSrcVT = SrcVT;
11463   EVT SplitDstVT = DstVT;
11464   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
11465          SplitSrcVT.getVectorNumElements() > 1) {
11466     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
11467     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
11468   }
11469 
11470   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
11471     return SDValue();
11472 
11473   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
11474 
11475   SDLoc DL(N);
11476   const unsigned NumSplits =
11477       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
11478   const unsigned Stride = SplitSrcVT.getStoreSize();
11479   SmallVector<SDValue, 4> Loads;
11480   SmallVector<SDValue, 4> Chains;
11481 
11482   SDValue BasePtr = LN0->getBasePtr();
11483   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
11484     const unsigned Offset = Idx * Stride;
11485     const Align Align = commonAlignment(LN0->getAlign(), Offset);
11486 
11487     SDValue SplitLoad = DAG.getExtLoad(
11488         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
11489         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
11490         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11491 
11492     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
11493 
11494     Loads.push_back(SplitLoad.getValue(0));
11495     Chains.push_back(SplitLoad.getValue(1));
11496   }
11497 
11498   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11499   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
11500 
11501   // Simplify TF.
11502   AddToWorklist(NewChain.getNode());
11503 
11504   CombineTo(N, NewValue);
11505 
11506   // Replace uses of the original load (before extension)
11507   // with a truncate of the concatenated sextloaded vectors.
11508   SDValue Trunc =
11509       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
11510   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
11511   CombineTo(N0.getNode(), Trunc, NewChain);
11512   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11513 }
11514 
11515 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11516 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11517 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
11518   assert(N->getOpcode() == ISD::ZERO_EXTEND);
11519   EVT VT = N->getValueType(0);
11520   EVT OrigVT = N->getOperand(0).getValueType();
11521   if (TLI.isZExtFree(OrigVT, VT))
11522     return SDValue();
11523 
11524   // and/or/xor
11525   SDValue N0 = N->getOperand(0);
11526   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11527         N0.getOpcode() == ISD::XOR) ||
11528       N0.getOperand(1).getOpcode() != ISD::Constant ||
11529       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
11530     return SDValue();
11531 
11532   // shl/shr
11533   SDValue N1 = N0->getOperand(0);
11534   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
11535       N1.getOperand(1).getOpcode() != ISD::Constant ||
11536       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
11537     return SDValue();
11538 
11539   // load
11540   if (!isa<LoadSDNode>(N1.getOperand(0)))
11541     return SDValue();
11542   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
11543   EVT MemVT = Load->getMemoryVT();
11544   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
11545       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
11546     return SDValue();
11547 
11548 
11549   // If the shift op is SHL, the logic op must be AND, otherwise the result
11550   // will be wrong.
11551   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
11552     return SDValue();
11553 
11554   if (!N0.hasOneUse() || !N1.hasOneUse())
11555     return SDValue();
11556 
11557   SmallVector<SDNode*, 4> SetCCs;
11558   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
11559                                ISD::ZERO_EXTEND, SetCCs, TLI))
11560     return SDValue();
11561 
11562   // Actually do the transformation.
11563   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
11564                                    Load->getChain(), Load->getBasePtr(),
11565                                    Load->getMemoryVT(), Load->getMemOperand());
11566 
11567   SDLoc DL1(N1);
11568   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
11569                               N1.getOperand(1));
11570 
11571   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11572   SDLoc DL0(N0);
11573   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
11574                             DAG.getConstant(Mask, DL0, VT));
11575 
11576   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11577   CombineTo(N, And);
11578   if (SDValue(Load, 0).hasOneUse()) {
11579     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
11580   } else {
11581     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
11582                                 Load->getValueType(0), ExtLoad);
11583     CombineTo(Load, Trunc, ExtLoad.getValue(1));
11584   }
11585 
11586   // N0 is dead at this point.
11587   recursivelyDeleteUnusedNodes(N0.getNode());
11588 
11589   return SDValue(N,0); // Return N so it doesn't get rechecked!
11590 }
11591 
11592 /// If we're narrowing or widening the result of a vector select and the final
11593 /// size is the same size as a setcc (compare) feeding the select, then try to
11594 /// apply the cast operation to the select's operands because matching vector
11595 /// sizes for a select condition and other operands should be more efficient.
11596 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
11597   unsigned CastOpcode = Cast->getOpcode();
11598   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
11599           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
11600           CastOpcode == ISD::FP_ROUND) &&
11601          "Unexpected opcode for vector select narrowing/widening");
11602 
11603   // We only do this transform before legal ops because the pattern may be
11604   // obfuscated by target-specific operations after legalization. Do not create
11605   // an illegal select op, however, because that may be difficult to lower.
11606   EVT VT = Cast->getValueType(0);
11607   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
11608     return SDValue();
11609 
11610   SDValue VSel = Cast->getOperand(0);
11611   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
11612       VSel.getOperand(0).getOpcode() != ISD::SETCC)
11613     return SDValue();
11614 
11615   // Does the setcc have the same vector size as the casted select?
11616   SDValue SetCC = VSel.getOperand(0);
11617   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
11618   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
11619     return SDValue();
11620 
11621   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
11622   SDValue A = VSel.getOperand(1);
11623   SDValue B = VSel.getOperand(2);
11624   SDValue CastA, CastB;
11625   SDLoc DL(Cast);
11626   if (CastOpcode == ISD::FP_ROUND) {
11627     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
11628     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
11629     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
11630   } else {
11631     CastA = DAG.getNode(CastOpcode, DL, VT, A);
11632     CastB = DAG.getNode(CastOpcode, DL, VT, B);
11633   }
11634   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
11635 }
11636 
11637 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11638 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11639 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
11640                                      const TargetLowering &TLI, EVT VT,
11641                                      bool LegalOperations, SDNode *N,
11642                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
11643   SDNode *N0Node = N0.getNode();
11644   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
11645                                                    : ISD::isZEXTLoad(N0Node);
11646   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
11647       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
11648     return SDValue();
11649 
11650   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11651   EVT MemVT = LN0->getMemoryVT();
11652   if ((LegalOperations || !LN0->isSimple() ||
11653        VT.isVector()) &&
11654       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
11655     return SDValue();
11656 
11657   SDValue ExtLoad =
11658       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11659                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
11660   Combiner.CombineTo(N, ExtLoad);
11661   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11662   if (LN0->use_empty())
11663     Combiner.recursivelyDeleteUnusedNodes(LN0);
11664   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11665 }
11666 
11667 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
11668 // Only generate vector extloads when 1) they're legal, and 2) they are
11669 // deemed desirable by the target.
11670 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
11671                                   const TargetLowering &TLI, EVT VT,
11672                                   bool LegalOperations, SDNode *N, SDValue N0,
11673                                   ISD::LoadExtType ExtLoadType,
11674                                   ISD::NodeType ExtOpc) {
11675   // TODO: isFixedLengthVector() should be removed and any negative effects on
11676   // code generation being the result of that target's implementation of
11677   // isVectorLoadExtDesirable().
11678   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
11679       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
11680       ((LegalOperations || VT.isFixedLengthVector() ||
11681         !cast<LoadSDNode>(N0)->isSimple()) &&
11682        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
11683     return {};
11684 
11685   bool DoXform = true;
11686   SmallVector<SDNode *, 4> SetCCs;
11687   if (!N0.hasOneUse())
11688     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
11689   if (VT.isVector())
11690     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
11691   if (!DoXform)
11692     return {};
11693 
11694   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11695   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
11696                                    LN0->getBasePtr(), N0.getValueType(),
11697                                    LN0->getMemOperand());
11698   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
11699   // If the load value is used only by N, replace it via CombineTo N.
11700   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
11701   Combiner.CombineTo(N, ExtLoad);
11702   if (NoReplaceTrunc) {
11703     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11704     Combiner.recursivelyDeleteUnusedNodes(LN0);
11705   } else {
11706     SDValue Trunc =
11707         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11708     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11709   }
11710   return SDValue(N, 0); // Return N so it doesn't get rechecked!
11711 }
11712 
11713 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
11714                                         const TargetLowering &TLI, EVT VT,
11715                                         SDNode *N, SDValue N0,
11716                                         ISD::LoadExtType ExtLoadType,
11717                                         ISD::NodeType ExtOpc) {
11718   if (!N0.hasOneUse())
11719     return SDValue();
11720 
11721   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
11722   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
11723     return SDValue();
11724 
11725   if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
11726     return SDValue();
11727 
11728   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
11729     return SDValue();
11730 
11731   SDLoc dl(Ld);
11732   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
11733   SDValue NewLoad = DAG.getMaskedLoad(
11734       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
11735       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
11736       ExtLoadType, Ld->isExpandingLoad());
11737   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
11738   return NewLoad;
11739 }
11740 
11741 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
11742                                        bool LegalOperations) {
11743   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
11744           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
11745 
11746   SDValue SetCC = N->getOperand(0);
11747   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
11748       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
11749     return SDValue();
11750 
11751   SDValue X = SetCC.getOperand(0);
11752   SDValue Ones = SetCC.getOperand(1);
11753   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
11754   EVT VT = N->getValueType(0);
11755   EVT XVT = X.getValueType();
11756   // setge X, C is canonicalized to setgt, so we do not need to match that
11757   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
11758   // not require the 'not' op.
11759   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
11760     // Invert and smear/shift the sign bit:
11761     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
11762     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
11763     SDLoc DL(N);
11764     unsigned ShCt = VT.getSizeInBits() - 1;
11765     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11766     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
11767       SDValue NotX = DAG.getNOT(DL, X, VT);
11768       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
11769       auto ShiftOpcode =
11770         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
11771       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
11772     }
11773   }
11774   return SDValue();
11775 }
11776 
11777 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
11778   SDValue N0 = N->getOperand(0);
11779   if (N0.getOpcode() != ISD::SETCC)
11780     return SDValue();
11781 
11782   SDValue N00 = N0.getOperand(0);
11783   SDValue N01 = N0.getOperand(1);
11784   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11785   EVT VT = N->getValueType(0);
11786   EVT N00VT = N00.getValueType();
11787   SDLoc DL(N);
11788 
11789   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
11790   // the same size as the compared operands. Try to optimize sext(setcc())
11791   // if this is the case.
11792   if (VT.isVector() && !LegalOperations &&
11793       TLI.getBooleanContents(N00VT) ==
11794           TargetLowering::ZeroOrNegativeOneBooleanContent) {
11795     EVT SVT = getSetCCResultType(N00VT);
11796 
11797     // If we already have the desired type, don't change it.
11798     if (SVT != N0.getValueType()) {
11799       // We know that the # elements of the results is the same as the
11800       // # elements of the compare (and the # elements of the compare result
11801       // for that matter).  Check to see that they are the same size.  If so,
11802       // we know that the element size of the sext'd result matches the
11803       // element size of the compare operands.
11804       if (VT.getSizeInBits() == SVT.getSizeInBits())
11805         return DAG.getSetCC(DL, VT, N00, N01, CC);
11806 
11807       // If the desired elements are smaller or larger than the source
11808       // elements, we can use a matching integer vector type and then
11809       // truncate/sign extend.
11810       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
11811       if (SVT == MatchingVecType) {
11812         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
11813         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
11814       }
11815     }
11816 
11817     // Try to eliminate the sext of a setcc by zexting the compare operands.
11818     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
11819         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
11820       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
11821       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
11822       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
11823 
11824       // We have an unsupported narrow vector compare op that would be legal
11825       // if extended to the destination type. See if the compare operands
11826       // can be freely extended to the destination type.
11827       auto IsFreeToExtend = [&](SDValue V) {
11828         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
11829           return true;
11830         // Match a simple, non-extended load that can be converted to a
11831         // legal {z/s}ext-load.
11832         // TODO: Allow widening of an existing {z/s}ext-load?
11833         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
11834               ISD::isUNINDEXEDLoad(V.getNode()) &&
11835               cast<LoadSDNode>(V)->isSimple() &&
11836               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
11837           return false;
11838 
11839         // Non-chain users of this value must either be the setcc in this
11840         // sequence or extends that can be folded into the new {z/s}ext-load.
11841         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
11842              UI != UE; ++UI) {
11843           // Skip uses of the chain and the setcc.
11844           SDNode *User = *UI;
11845           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
11846             continue;
11847           // Extra users must have exactly the same cast we are about to create.
11848           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
11849           //       is enhanced similarly.
11850           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
11851             return false;
11852         }
11853         return true;
11854       };
11855 
11856       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
11857         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
11858         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
11859         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
11860       }
11861     }
11862   }
11863 
11864   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
11865   // Here, T can be 1 or -1, depending on the type of the setcc and
11866   // getBooleanContents().
11867   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
11868 
11869   // To determine the "true" side of the select, we need to know the high bit
11870   // of the value returned by the setcc if it evaluates to true.
11871   // If the type of the setcc is i1, then the true case of the select is just
11872   // sext(i1 1), that is, -1.
11873   // If the type of the setcc is larger (say, i8) then the value of the high
11874   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
11875   // of the appropriate width.
11876   SDValue ExtTrueVal = (SetCCWidth == 1)
11877                            ? DAG.getAllOnesConstant(DL, VT)
11878                            : DAG.getBoolConstant(true, DL, VT, N00VT);
11879   SDValue Zero = DAG.getConstant(0, DL, VT);
11880   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
11881     return SCC;
11882 
11883   if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
11884     EVT SetCCVT = getSetCCResultType(N00VT);
11885     // Don't do this transform for i1 because there's a select transform
11886     // that would reverse it.
11887     // TODO: We should not do this transform at all without a target hook
11888     // because a sext is likely cheaper than a select?
11889     if (SetCCVT.getScalarSizeInBits() != 1 &&
11890         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
11891       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
11892       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
11893     }
11894   }
11895 
11896   return SDValue();
11897 }
11898 
11899 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
11900   SDValue N0 = N->getOperand(0);
11901   EVT VT = N->getValueType(0);
11902   SDLoc DL(N);
11903 
11904   // sext(undef) = 0 because the top bit will all be the same.
11905   if (N0.isUndef())
11906     return DAG.getConstant(0, DL, VT);
11907 
11908   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11909     return Res;
11910 
11911   // fold (sext (sext x)) -> (sext x)
11912   // fold (sext (aext x)) -> (sext x)
11913   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11914     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
11915 
11916   if (N0.getOpcode() == ISD::TRUNCATE) {
11917     // fold (sext (truncate (load x))) -> (sext (smaller load x))
11918     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
11919     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
11920       SDNode *oye = N0.getOperand(0).getNode();
11921       if (NarrowLoad.getNode() != N0.getNode()) {
11922         CombineTo(N0.getNode(), NarrowLoad);
11923         // CombineTo deleted the truncate, if needed, but not what's under it.
11924         AddToWorklist(oye);
11925       }
11926       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11927     }
11928 
11929     // See if the value being truncated is already sign extended.  If so, just
11930     // eliminate the trunc/sext pair.
11931     SDValue Op = N0.getOperand(0);
11932     unsigned OpBits   = Op.getScalarValueSizeInBits();
11933     unsigned MidBits  = N0.getScalarValueSizeInBits();
11934     unsigned DestBits = VT.getScalarSizeInBits();
11935     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
11936 
11937     if (OpBits == DestBits) {
11938       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
11939       // bits, it is already ready.
11940       if (NumSignBits > DestBits-MidBits)
11941         return Op;
11942     } else if (OpBits < DestBits) {
11943       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
11944       // bits, just sext from i32.
11945       if (NumSignBits > OpBits-MidBits)
11946         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
11947     } else {
11948       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
11949       // bits, just truncate to i32.
11950       if (NumSignBits > OpBits-MidBits)
11951         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
11952     }
11953 
11954     // fold (sext (truncate x)) -> (sextinreg x).
11955     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
11956                                                  N0.getValueType())) {
11957       if (OpBits < DestBits)
11958         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
11959       else if (OpBits > DestBits)
11960         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
11961       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
11962                          DAG.getValueType(N0.getValueType()));
11963     }
11964   }
11965 
11966   // Try to simplify (sext (load x)).
11967   if (SDValue foldedExt =
11968           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11969                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
11970     return foldedExt;
11971 
11972   if (SDValue foldedExt =
11973       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
11974                                ISD::SIGN_EXTEND))
11975     return foldedExt;
11976 
11977   // fold (sext (load x)) to multiple smaller sextloads.
11978   // Only on illegal but splittable vectors.
11979   if (SDValue ExtLoad = CombineExtLoad(N))
11980     return ExtLoad;
11981 
11982   // Try to simplify (sext (sextload x)).
11983   if (SDValue foldedExt = tryToFoldExtOfExtload(
11984           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
11985     return foldedExt;
11986 
11987   // fold (sext (and/or/xor (load x), cst)) ->
11988   //      (and/or/xor (sextload x), (sext cst))
11989   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11990        N0.getOpcode() == ISD::XOR) &&
11991       isa<LoadSDNode>(N0.getOperand(0)) &&
11992       N0.getOperand(1).getOpcode() == ISD::Constant &&
11993       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11994     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11995     EVT MemVT = LN00->getMemoryVT();
11996     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
11997       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
11998       SmallVector<SDNode*, 4> SetCCs;
11999       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12000                                              ISD::SIGN_EXTEND, SetCCs, TLI);
12001       if (DoXform) {
12002         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
12003                                          LN00->getChain(), LN00->getBasePtr(),
12004                                          LN00->getMemoryVT(),
12005                                          LN00->getMemOperand());
12006         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
12007         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12008                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
12009         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
12010         bool NoReplaceTruncAnd = !N0.hasOneUse();
12011         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12012         CombineTo(N, And);
12013         // If N0 has multiple uses, change other uses as well.
12014         if (NoReplaceTruncAnd) {
12015           SDValue TruncAnd =
12016               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12017           CombineTo(N0.getNode(), TruncAnd);
12018         }
12019         if (NoReplaceTrunc) {
12020           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12021         } else {
12022           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12023                                       LN00->getValueType(0), ExtLoad);
12024           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12025         }
12026         return SDValue(N,0); // Return N so it doesn't get rechecked!
12027       }
12028     }
12029   }
12030 
12031   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12032     return V;
12033 
12034   if (SDValue V = foldSextSetcc(N))
12035     return V;
12036 
12037   // fold (sext x) -> (zext x) if the sign bit is known zero.
12038   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
12039       DAG.SignBitIsZero(N0))
12040     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
12041 
12042   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12043     return NewVSel;
12044 
12045   // Eliminate this sign extend by doing a negation in the destination type:
12046   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
12047   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
12048       isNullOrNullSplat(N0.getOperand(0)) &&
12049       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
12050       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
12051     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
12052     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
12053   }
12054   // Eliminate this sign extend by doing a decrement in the destination type:
12055   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
12056   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
12057       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
12058       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12059       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
12060     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12061     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12062   }
12063 
12064   // fold sext (not i1 X) -> add (zext i1 X), -1
12065   // TODO: This could be extended to handle bool vectors.
12066   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
12067       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
12068                             TLI.isOperationLegal(ISD::ADD, VT)))) {
12069     // If we can eliminate the 'not', the sext form should be better
12070     if (SDValue NewXor = visitXOR(N0.getNode())) {
12071       // Returning N0 is a form of in-visit replacement that may have
12072       // invalidated N0.
12073       if (NewXor.getNode() == N0.getNode()) {
12074         // Return SDValue here as the xor should have already been replaced in
12075         // this sext.
12076         return SDValue();
12077       }
12078 
12079       // Return a new sext with the new xor.
12080       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
12081     }
12082 
12083     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
12084     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
12085   }
12086 
12087   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12088     return Res;
12089 
12090   return SDValue();
12091 }
12092 
12093 // isTruncateOf - If N is a truncate of some other value, return true, record
12094 // the value being truncated in Op and which of Op's bits are zero/one in Known.
12095 // This function computes KnownBits to avoid a duplicated call to
12096 // computeKnownBits in the caller.
12097 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
12098                          KnownBits &Known) {
12099   if (N->getOpcode() == ISD::TRUNCATE) {
12100     Op = N->getOperand(0);
12101     Known = DAG.computeKnownBits(Op);
12102     return true;
12103   }
12104 
12105   if (N.getOpcode() != ISD::SETCC ||
12106       N.getValueType().getScalarType() != MVT::i1 ||
12107       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
12108     return false;
12109 
12110   SDValue Op0 = N->getOperand(0);
12111   SDValue Op1 = N->getOperand(1);
12112   assert(Op0.getValueType() == Op1.getValueType());
12113 
12114   if (isNullOrNullSplat(Op0))
12115     Op = Op1;
12116   else if (isNullOrNullSplat(Op1))
12117     Op = Op0;
12118   else
12119     return false;
12120 
12121   Known = DAG.computeKnownBits(Op);
12122 
12123   return (Known.Zero | 1).isAllOnes();
12124 }
12125 
12126 /// Given an extending node with a pop-count operand, if the target does not
12127 /// support a pop-count in the narrow source type but does support it in the
12128 /// destination type, widen the pop-count to the destination type.
12129 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
12130   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
12131           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
12132 
12133   SDValue CtPop = Extend->getOperand(0);
12134   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
12135     return SDValue();
12136 
12137   EVT VT = Extend->getValueType(0);
12138   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12139   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
12140       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
12141     return SDValue();
12142 
12143   // zext (ctpop X) --> ctpop (zext X)
12144   SDLoc DL(Extend);
12145   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
12146   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
12147 }
12148 
12149 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
12150   SDValue N0 = N->getOperand(0);
12151   EVT VT = N->getValueType(0);
12152 
12153   // zext(undef) = 0
12154   if (N0.isUndef())
12155     return DAG.getConstant(0, SDLoc(N), VT);
12156 
12157   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12158     return Res;
12159 
12160   // fold (zext (zext x)) -> (zext x)
12161   // fold (zext (aext x)) -> (zext x)
12162   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
12163     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
12164                        N0.getOperand(0));
12165 
12166   // fold (zext (truncate x)) -> (zext x) or
12167   //      (zext (truncate x)) -> (truncate x)
12168   // This is valid when the truncated bits of x are already zero.
12169   SDValue Op;
12170   KnownBits Known;
12171   if (isTruncateOf(DAG, N0, Op, Known)) {
12172     APInt TruncatedBits =
12173       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
12174       APInt(Op.getScalarValueSizeInBits(), 0) :
12175       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
12176                         N0.getScalarValueSizeInBits(),
12177                         std::min(Op.getScalarValueSizeInBits(),
12178                                  VT.getScalarSizeInBits()));
12179     if (TruncatedBits.isSubsetOf(Known.Zero))
12180       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12181   }
12182 
12183   // fold (zext (truncate x)) -> (and x, mask)
12184   if (N0.getOpcode() == ISD::TRUNCATE) {
12185     // fold (zext (truncate (load x))) -> (zext (smaller load x))
12186     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
12187     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12188       SDNode *oye = N0.getOperand(0).getNode();
12189       if (NarrowLoad.getNode() != N0.getNode()) {
12190         CombineTo(N0.getNode(), NarrowLoad);
12191         // CombineTo deleted the truncate, if needed, but not what's under it.
12192         AddToWorklist(oye);
12193       }
12194       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12195     }
12196 
12197     EVT SrcVT = N0.getOperand(0).getValueType();
12198     EVT MinVT = N0.getValueType();
12199 
12200     // Try to mask before the extension to avoid having to generate a larger mask,
12201     // possibly over several sub-vectors.
12202     if (SrcVT.bitsLT(VT) && VT.isVector()) {
12203       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
12204                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
12205         SDValue Op = N0.getOperand(0);
12206         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12207         AddToWorklist(Op.getNode());
12208         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
12209         // Transfer the debug info; the new node is equivalent to N0.
12210         DAG.transferDbgValues(N0, ZExtOrTrunc);
12211         return ZExtOrTrunc;
12212       }
12213     }
12214 
12215     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
12216       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12217       AddToWorklist(Op.getNode());
12218       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
12219       // We may safely transfer the debug info describing the truncate node over
12220       // to the equivalent and operation.
12221       DAG.transferDbgValues(N0, And);
12222       return And;
12223     }
12224   }
12225 
12226   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
12227   // if either of the casts is not free.
12228   if (N0.getOpcode() == ISD::AND &&
12229       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12230       N0.getOperand(1).getOpcode() == ISD::Constant &&
12231       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12232                            N0.getValueType()) ||
12233        !TLI.isZExtFree(N0.getValueType(), VT))) {
12234     SDValue X = N0.getOperand(0).getOperand(0);
12235     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
12236     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12237     SDLoc DL(N);
12238     return DAG.getNode(ISD::AND, DL, VT,
12239                        X, DAG.getConstant(Mask, DL, VT));
12240   }
12241 
12242   // Try to simplify (zext (load x)).
12243   if (SDValue foldedExt =
12244           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12245                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12246     return foldedExt;
12247 
12248   if (SDValue foldedExt =
12249       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
12250                                ISD::ZERO_EXTEND))
12251     return foldedExt;
12252 
12253   // fold (zext (load x)) to multiple smaller zextloads.
12254   // Only on illegal but splittable vectors.
12255   if (SDValue ExtLoad = CombineExtLoad(N))
12256     return ExtLoad;
12257 
12258   // fold (zext (and/or/xor (load x), cst)) ->
12259   //      (and/or/xor (zextload x), (zext cst))
12260   // Unless (and (load x) cst) will match as a zextload already and has
12261   // additional users.
12262   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
12263        N0.getOpcode() == ISD::XOR) &&
12264       isa<LoadSDNode>(N0.getOperand(0)) &&
12265       N0.getOperand(1).getOpcode() == ISD::Constant &&
12266       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
12267     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
12268     EVT MemVT = LN00->getMemoryVT();
12269     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
12270         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
12271       bool DoXform = true;
12272       SmallVector<SDNode*, 4> SetCCs;
12273       if (!N0.hasOneUse()) {
12274         if (N0.getOpcode() == ISD::AND) {
12275           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
12276           EVT LoadResultTy = AndC->getValueType(0);
12277           EVT ExtVT;
12278           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
12279             DoXform = false;
12280         }
12281       }
12282       if (DoXform)
12283         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
12284                                           ISD::ZERO_EXTEND, SetCCs, TLI);
12285       if (DoXform) {
12286         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
12287                                          LN00->getChain(), LN00->getBasePtr(),
12288                                          LN00->getMemoryVT(),
12289                                          LN00->getMemOperand());
12290         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
12291         SDLoc DL(N);
12292         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
12293                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
12294         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
12295         bool NoReplaceTruncAnd = !N0.hasOneUse();
12296         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
12297         CombineTo(N, And);
12298         // If N0 has multiple uses, change other uses as well.
12299         if (NoReplaceTruncAnd) {
12300           SDValue TruncAnd =
12301               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
12302           CombineTo(N0.getNode(), TruncAnd);
12303         }
12304         if (NoReplaceTrunc) {
12305           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
12306         } else {
12307           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
12308                                       LN00->getValueType(0), ExtLoad);
12309           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
12310         }
12311         return SDValue(N,0); // Return N so it doesn't get rechecked!
12312       }
12313     }
12314   }
12315 
12316   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12317   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12318   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
12319     return ZExtLoad;
12320 
12321   // Try to simplify (zext (zextload x)).
12322   if (SDValue foldedExt = tryToFoldExtOfExtload(
12323           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
12324     return foldedExt;
12325 
12326   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
12327     return V;
12328 
12329   if (N0.getOpcode() == ISD::SETCC) {
12330     // Only do this before legalize for now.
12331     if (!LegalOperations && VT.isVector() &&
12332         N0.getValueType().getVectorElementType() == MVT::i1) {
12333       EVT N00VT = N0.getOperand(0).getValueType();
12334       if (getSetCCResultType(N00VT) == N0.getValueType())
12335         return SDValue();
12336 
12337       // We know that the # elements of the results is the same as the #
12338       // elements of the compare (and the # elements of the compare result for
12339       // that matter). Check to see that they are the same size. If so, we know
12340       // that the element size of the sext'd result matches the element size of
12341       // the compare operands.
12342       SDLoc DL(N);
12343       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
12344         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
12345         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
12346                                      N0.getOperand(1), N0.getOperand(2));
12347         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
12348       }
12349 
12350       // If the desired elements are smaller or larger than the source
12351       // elements we can use a matching integer vector type and then
12352       // truncate/any extend followed by zext_in_reg.
12353       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12354       SDValue VsetCC =
12355           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
12356                       N0.getOperand(1), N0.getOperand(2));
12357       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
12358                                     N0.getValueType());
12359     }
12360 
12361     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
12362     SDLoc DL(N);
12363     EVT N0VT = N0.getValueType();
12364     EVT N00VT = N0.getOperand(0).getValueType();
12365     if (SDValue SCC = SimplifySelectCC(
12366             DL, N0.getOperand(0), N0.getOperand(1),
12367             DAG.getBoolConstant(true, DL, N0VT, N00VT),
12368             DAG.getBoolConstant(false, DL, N0VT, N00VT),
12369             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12370       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
12371   }
12372 
12373   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
12374   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
12375       isa<ConstantSDNode>(N0.getOperand(1)) &&
12376       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12377       N0.hasOneUse()) {
12378     SDValue ShAmt = N0.getOperand(1);
12379     if (N0.getOpcode() == ISD::SHL) {
12380       SDValue InnerZExt = N0.getOperand(0);
12381       // If the original shl may be shifting out bits, do not perform this
12382       // transformation.
12383       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
12384         InnerZExt.getOperand(0).getValueSizeInBits();
12385       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
12386         return SDValue();
12387     }
12388 
12389     SDLoc DL(N);
12390 
12391     // Ensure that the shift amount is wide enough for the shifted value.
12392     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
12393       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
12394 
12395     return DAG.getNode(N0.getOpcode(), DL, VT,
12396                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
12397                        ShAmt);
12398   }
12399 
12400   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12401     return NewVSel;
12402 
12403   if (SDValue NewCtPop = widenCtPop(N, DAG))
12404     return NewCtPop;
12405 
12406   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12407     return Res;
12408 
12409   return SDValue();
12410 }
12411 
12412 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
12413   SDValue N0 = N->getOperand(0);
12414   EVT VT = N->getValueType(0);
12415 
12416   // aext(undef) = undef
12417   if (N0.isUndef())
12418     return DAG.getUNDEF(VT);
12419 
12420   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12421     return Res;
12422 
12423   // fold (aext (aext x)) -> (aext x)
12424   // fold (aext (zext x)) -> (zext x)
12425   // fold (aext (sext x)) -> (sext x)
12426   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
12427       N0.getOpcode() == ISD::ZERO_EXTEND ||
12428       N0.getOpcode() == ISD::SIGN_EXTEND)
12429     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12430 
12431   // fold (aext (truncate (load x))) -> (aext (smaller load x))
12432   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
12433   if (N0.getOpcode() == ISD::TRUNCATE) {
12434     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
12435       SDNode *oye = N0.getOperand(0).getNode();
12436       if (NarrowLoad.getNode() != N0.getNode()) {
12437         CombineTo(N0.getNode(), NarrowLoad);
12438         // CombineTo deleted the truncate, if needed, but not what's under it.
12439         AddToWorklist(oye);
12440       }
12441       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12442     }
12443   }
12444 
12445   // fold (aext (truncate x))
12446   if (N0.getOpcode() == ISD::TRUNCATE)
12447     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
12448 
12449   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
12450   // if the trunc is not free.
12451   if (N0.getOpcode() == ISD::AND &&
12452       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
12453       N0.getOperand(1).getOpcode() == ISD::Constant &&
12454       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
12455                           N0.getValueType())) {
12456     SDLoc DL(N);
12457     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
12458     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
12459     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
12460     return DAG.getNode(ISD::AND, DL, VT, X, Y);
12461   }
12462 
12463   // fold (aext (load x)) -> (aext (truncate (extload x)))
12464   // None of the supported targets knows how to perform load and any_ext
12465   // on vectors in one instruction, so attempt to fold to zext instead.
12466   if (VT.isVector()) {
12467     // Try to simplify (zext (load x)).
12468     if (SDValue foldedExt =
12469             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
12470                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
12471       return foldedExt;
12472   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
12473              ISD::isUNINDEXEDLoad(N0.getNode()) &&
12474              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12475     bool DoXform = true;
12476     SmallVector<SDNode *, 4> SetCCs;
12477     if (!N0.hasOneUse())
12478       DoXform =
12479           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
12480     if (DoXform) {
12481       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12482       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12483                                        LN0->getChain(), LN0->getBasePtr(),
12484                                        N0.getValueType(), LN0->getMemOperand());
12485       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
12486       // If the load value is used only by N, replace it via CombineTo N.
12487       bool NoReplaceTrunc = N0.hasOneUse();
12488       CombineTo(N, ExtLoad);
12489       if (NoReplaceTrunc) {
12490         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12491         recursivelyDeleteUnusedNodes(LN0);
12492       } else {
12493         SDValue Trunc =
12494             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
12495         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
12496       }
12497       return SDValue(N, 0); // Return N so it doesn't get rechecked!
12498     }
12499   }
12500 
12501   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
12502   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
12503   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
12504   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
12505       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
12506     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12507     ISD::LoadExtType ExtType = LN0->getExtensionType();
12508     EVT MemVT = LN0->getMemoryVT();
12509     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
12510       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
12511                                        VT, LN0->getChain(), LN0->getBasePtr(),
12512                                        MemVT, LN0->getMemOperand());
12513       CombineTo(N, ExtLoad);
12514       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
12515       recursivelyDeleteUnusedNodes(LN0);
12516       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12517     }
12518   }
12519 
12520   if (N0.getOpcode() == ISD::SETCC) {
12521     // For vectors:
12522     // aext(setcc) -> vsetcc
12523     // aext(setcc) -> truncate(vsetcc)
12524     // aext(setcc) -> aext(vsetcc)
12525     // Only do this before legalize for now.
12526     if (VT.isVector() && !LegalOperations) {
12527       EVT N00VT = N0.getOperand(0).getValueType();
12528       if (getSetCCResultType(N00VT) == N0.getValueType())
12529         return SDValue();
12530 
12531       // We know that the # elements of the results is the same as the
12532       // # elements of the compare (and the # elements of the compare result
12533       // for that matter).  Check to see that they are the same size.  If so,
12534       // we know that the element size of the sext'd result matches the
12535       // element size of the compare operands.
12536       if (VT.getSizeInBits() == N00VT.getSizeInBits())
12537         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
12538                              N0.getOperand(1),
12539                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
12540 
12541       // If the desired elements are smaller or larger than the source
12542       // elements we can use a matching integer vector type and then
12543       // truncate/any extend
12544       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
12545       SDValue VsetCC =
12546         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
12547                       N0.getOperand(1),
12548                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
12549       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
12550     }
12551 
12552     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
12553     SDLoc DL(N);
12554     if (SDValue SCC = SimplifySelectCC(
12555             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
12556             DAG.getConstant(0, DL, VT),
12557             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
12558       return SCC;
12559   }
12560 
12561   if (SDValue NewCtPop = widenCtPop(N, DAG))
12562     return NewCtPop;
12563 
12564   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
12565     return Res;
12566 
12567   return SDValue();
12568 }
12569 
12570 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
12571   unsigned Opcode = N->getOpcode();
12572   SDValue N0 = N->getOperand(0);
12573   SDValue N1 = N->getOperand(1);
12574   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
12575 
12576   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
12577   if (N0.getOpcode() == Opcode &&
12578       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
12579     return N0;
12580 
12581   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12582       N0.getOperand(0).getOpcode() == Opcode) {
12583     // We have an assert, truncate, assert sandwich. Make one stronger assert
12584     // by asserting on the smallest asserted type to the larger source type.
12585     // This eliminates the later assert:
12586     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
12587     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
12588     SDLoc DL(N);
12589     SDValue BigA = N0.getOperand(0);
12590     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12591     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
12592     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
12593     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12594                                     BigA.getOperand(0), MinAssertVTVal);
12595     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12596   }
12597 
12598   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
12599   // than X. Just move the AssertZext in front of the truncate and drop the
12600   // AssertSExt.
12601   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
12602       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
12603       Opcode == ISD::AssertZext) {
12604     SDValue BigA = N0.getOperand(0);
12605     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
12606     if (AssertVT.bitsLT(BigA_AssertVT)) {
12607       SDLoc DL(N);
12608       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
12609                                       BigA.getOperand(0), N1);
12610       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
12611     }
12612   }
12613 
12614   return SDValue();
12615 }
12616 
12617 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
12618   SDLoc DL(N);
12619 
12620   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
12621   SDValue N0 = N->getOperand(0);
12622 
12623   // Fold (assertalign (assertalign x, AL0), AL1) ->
12624   // (assertalign x, max(AL0, AL1))
12625   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
12626     return DAG.getAssertAlign(DL, N0.getOperand(0),
12627                               std::max(AL, AAN->getAlign()));
12628 
12629   // In rare cases, there are trivial arithmetic ops in source operands. Sink
12630   // this assert down to source operands so that those arithmetic ops could be
12631   // exposed to the DAG combining.
12632   switch (N0.getOpcode()) {
12633   default:
12634     break;
12635   case ISD::ADD:
12636   case ISD::SUB: {
12637     unsigned AlignShift = Log2(AL);
12638     SDValue LHS = N0.getOperand(0);
12639     SDValue RHS = N0.getOperand(1);
12640     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
12641     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
12642     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
12643       if (LHSAlignShift < AlignShift)
12644         LHS = DAG.getAssertAlign(DL, LHS, AL);
12645       if (RHSAlignShift < AlignShift)
12646         RHS = DAG.getAssertAlign(DL, RHS, AL);
12647       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
12648     }
12649     break;
12650   }
12651   }
12652 
12653   return SDValue();
12654 }
12655 
12656 /// If the result of a load is shifted/masked/truncated to an effectively
12657 /// narrower type, try to transform the load to a narrower type and/or
12658 /// use an extending load.
12659 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
12660   unsigned Opc = N->getOpcode();
12661 
12662   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
12663   SDValue N0 = N->getOperand(0);
12664   EVT VT = N->getValueType(0);
12665   EVT ExtVT = VT;
12666 
12667   // This transformation isn't valid for vector loads.
12668   if (VT.isVector())
12669     return SDValue();
12670 
12671   // The ShAmt variable is used to indicate that we've consumed a right
12672   // shift. I.e. we want to narrow the width of the load by skipping to load the
12673   // ShAmt least significant bits.
12674   unsigned ShAmt = 0;
12675   // A special case is when the least significant bits from the load are masked
12676   // away, but using an AND rather than a right shift. HasShiftedOffset is used
12677   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
12678   // the result.
12679   bool HasShiftedOffset = false;
12680   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
12681   // extended to VT.
12682   if (Opc == ISD::SIGN_EXTEND_INREG) {
12683     ExtType = ISD::SEXTLOAD;
12684     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12685   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
12686     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
12687     // value, or it may be shifting a higher subword, half or byte into the
12688     // lowest bits.
12689 
12690     // Only handle shift with constant shift amount, and the shiftee must be a
12691     // load.
12692     auto *LN = dyn_cast<LoadSDNode>(N0);
12693     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
12694     if (!N1C || !LN)
12695       return SDValue();
12696     // If the shift amount is larger than the memory type then we're not
12697     // accessing any of the loaded bytes.
12698     ShAmt = N1C->getZExtValue();
12699     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
12700     if (MemoryWidth <= ShAmt)
12701       return SDValue();
12702     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
12703     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
12704     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12705     // If original load is a SEXTLOAD then we can't simply replace it by a
12706     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
12707     // followed by a ZEXT, but that is not handled at the moment). Similarly if
12708     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
12709     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
12710          LN->getExtensionType() == ISD::ZEXTLOAD) &&
12711         LN->getExtensionType() != ExtType)
12712       return SDValue();
12713   } else if (Opc == ISD::AND) {
12714     // An AND with a constant mask is the same as a truncate + zero-extend.
12715     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
12716     if (!AndC)
12717       return SDValue();
12718 
12719     const APInt &Mask = AndC->getAPIntValue();
12720     unsigned ActiveBits = 0;
12721     if (Mask.isMask()) {
12722       ActiveBits = Mask.countTrailingOnes();
12723     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
12724       HasShiftedOffset = true;
12725     } else {
12726       return SDValue();
12727     }
12728 
12729     ExtType = ISD::ZEXTLOAD;
12730     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
12731   }
12732 
12733   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
12734   // a right shift. Here we redo some of those checks, to possibly adjust the
12735   // ExtVT even further based on "a masking AND". We could also end up here for
12736   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
12737   // need to be done here as well.
12738   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
12739     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
12740     // Bail out when the SRL has more than one use. This is done for historical
12741     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
12742     // check below? And maybe it could be non-profitable to do the transform in
12743     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
12744     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
12745     if (!SRL.hasOneUse())
12746       return SDValue();
12747 
12748     // Only handle shift with constant shift amount, and the shiftee must be a
12749     // load.
12750     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
12751     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
12752     if (!SRL1C || !LN)
12753       return SDValue();
12754 
12755     // If the shift amount is larger than the input type then we're not
12756     // accessing any of the loaded bytes.  If the load was a zextload/extload
12757     // then the result of the shift+trunc is zero/undef (handled elsewhere).
12758     ShAmt = SRL1C->getZExtValue();
12759     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
12760     if (ShAmt >= MemoryWidth)
12761       return SDValue();
12762 
12763     // Because a SRL must be assumed to *need* to zero-extend the high bits
12764     // (as opposed to anyext the high bits), we can't combine the zextload
12765     // lowering of SRL and an sextload.
12766     if (LN->getExtensionType() == ISD::SEXTLOAD)
12767       return SDValue();
12768 
12769     // Avoid reading outside the memory accessed by the original load (could
12770     // happened if we only adjust the load base pointer by ShAmt). Instead we
12771     // try to narrow the load even further. The typical scenario here is:
12772     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
12773     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
12774     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
12775       // Don't replace sextload by zextload.
12776       if (ExtType == ISD::SEXTLOAD)
12777         return SDValue();
12778       // Narrow the load.
12779       ExtType = ISD::ZEXTLOAD;
12780       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
12781     }
12782 
12783     // If the SRL is only used by a masking AND, we may be able to adjust
12784     // the ExtVT to make the AND redundant.
12785     SDNode *Mask = *(SRL->use_begin());
12786     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
12787         isa<ConstantSDNode>(Mask->getOperand(1))) {
12788       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
12789       if (ShiftMask.isMask()) {
12790         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
12791                                          ShiftMask.countTrailingOnes());
12792         // If the mask is smaller, recompute the type.
12793         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
12794             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
12795           ExtVT = MaskedVT;
12796       }
12797     }
12798 
12799     N0 = SRL.getOperand(0);
12800   }
12801 
12802   // If the load is shifted left (and the result isn't shifted back right), we
12803   // can fold a truncate through the shift. The typical scenario is that N
12804   // points at a TRUNCATE here so the attempted fold is:
12805   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
12806   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
12807   unsigned ShLeftAmt = 0;
12808   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12809       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
12810     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
12811       ShLeftAmt = N01->getZExtValue();
12812       N0 = N0.getOperand(0);
12813     }
12814   }
12815 
12816   // If we haven't found a load, we can't narrow it.
12817   if (!isa<LoadSDNode>(N0))
12818     return SDValue();
12819 
12820   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12821   // Reducing the width of a volatile load is illegal.  For atomics, we may be
12822   // able to reduce the width provided we never widen again. (see D66309)
12823   if (!LN0->isSimple() ||
12824       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
12825     return SDValue();
12826 
12827   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
12828     unsigned LVTStoreBits =
12829         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
12830     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
12831     return LVTStoreBits - EVTStoreBits - ShAmt;
12832   };
12833 
12834   // We need to adjust the pointer to the load by ShAmt bits in order to load
12835   // the correct bytes.
12836   unsigned PtrAdjustmentInBits =
12837       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
12838 
12839   uint64_t PtrOff = PtrAdjustmentInBits / 8;
12840   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
12841   SDLoc DL(LN0);
12842   // The original load itself didn't wrap, so an offset within it doesn't.
12843   SDNodeFlags Flags;
12844   Flags.setNoUnsignedWrap(true);
12845   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
12846                                             TypeSize::Fixed(PtrOff), DL, Flags);
12847   AddToWorklist(NewPtr.getNode());
12848 
12849   SDValue Load;
12850   if (ExtType == ISD::NON_EXTLOAD)
12851     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
12852                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
12853                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12854   else
12855     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
12856                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
12857                           NewAlign, LN0->getMemOperand()->getFlags(),
12858                           LN0->getAAInfo());
12859 
12860   // Replace the old load's chain with the new load's chain.
12861   WorklistRemover DeadNodes(*this);
12862   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12863 
12864   // Shift the result left, if we've swallowed a left shift.
12865   SDValue Result = Load;
12866   if (ShLeftAmt != 0) {
12867     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
12868     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
12869       ShImmTy = VT;
12870     // If the shift amount is as large as the result size (but, presumably,
12871     // no larger than the source) then the useful bits of the result are
12872     // zero; we can't simply return the shortened shift, because the result
12873     // of that operation is undefined.
12874     if (ShLeftAmt >= VT.getScalarSizeInBits())
12875       Result = DAG.getConstant(0, DL, VT);
12876     else
12877       Result = DAG.getNode(ISD::SHL, DL, VT,
12878                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
12879   }
12880 
12881   if (HasShiftedOffset) {
12882     // We're using a shifted mask, so the load now has an offset. This means
12883     // that data has been loaded into the lower bytes than it would have been
12884     // before, so we need to shl the loaded data into the correct position in the
12885     // register.
12886     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
12887     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
12888     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
12889   }
12890 
12891   // Return the new loaded value.
12892   return Result;
12893 }
12894 
12895 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
12896   SDValue N0 = N->getOperand(0);
12897   SDValue N1 = N->getOperand(1);
12898   EVT VT = N->getValueType(0);
12899   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
12900   unsigned VTBits = VT.getScalarSizeInBits();
12901   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
12902 
12903   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
12904   if (N0.isUndef())
12905     return DAG.getConstant(0, SDLoc(N), VT);
12906 
12907   // fold (sext_in_reg c1) -> c1
12908   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
12909     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
12910 
12911   // If the input is already sign extended, just drop the extension.
12912   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
12913     return N0;
12914 
12915   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
12916   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
12917       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
12918     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
12919                        N1);
12920 
12921   // fold (sext_in_reg (sext x)) -> (sext x)
12922   // fold (sext_in_reg (aext x)) -> (sext x)
12923   // if x is small enough or if we know that x has more than 1 sign bit and the
12924   // sign_extend_inreg is extending from one of them.
12925   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
12926     SDValue N00 = N0.getOperand(0);
12927     unsigned N00Bits = N00.getScalarValueSizeInBits();
12928     if ((N00Bits <= ExtVTBits ||
12929          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
12930         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12931       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
12932   }
12933 
12934   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
12935   // if x is small enough or if we know that x has more than 1 sign bit and the
12936   // sign_extend_inreg is extending from one of them.
12937   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
12938       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
12939       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
12940     SDValue N00 = N0.getOperand(0);
12941     unsigned N00Bits = N00.getScalarValueSizeInBits();
12942     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
12943     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
12944     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
12945     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
12946     if ((N00Bits == ExtVTBits ||
12947          (!IsZext && (N00Bits < ExtVTBits ||
12948                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
12949         (!LegalOperations ||
12950          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
12951       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
12952   }
12953 
12954   // fold (sext_in_reg (zext x)) -> (sext x)
12955   // iff we are extending the source sign bit.
12956   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
12957     SDValue N00 = N0.getOperand(0);
12958     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
12959         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
12960       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
12961   }
12962 
12963   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
12964   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
12965     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
12966 
12967   // fold operands of sext_in_reg based on knowledge that the top bits are not
12968   // demanded.
12969   if (SimplifyDemandedBits(SDValue(N, 0)))
12970     return SDValue(N, 0);
12971 
12972   // fold (sext_in_reg (load x)) -> (smaller sextload x)
12973   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
12974   if (SDValue NarrowLoad = reduceLoadWidth(N))
12975     return NarrowLoad;
12976 
12977   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
12978   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
12979   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
12980   if (N0.getOpcode() == ISD::SRL) {
12981     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
12982       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
12983         // We can turn this into an SRA iff the input to the SRL is already sign
12984         // extended enough.
12985         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
12986         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
12987           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
12988                              N0.getOperand(1));
12989       }
12990   }
12991 
12992   // fold (sext_inreg (extload x)) -> (sextload x)
12993   // If sextload is not supported by target, we can only do the combine when
12994   // load has one use. Doing otherwise can block folding the extload with other
12995   // extends that the target does support.
12996   if (ISD::isEXTLoad(N0.getNode()) &&
12997       ISD::isUNINDEXEDLoad(N0.getNode()) &&
12998       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
12999       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
13000         N0.hasOneUse()) ||
13001        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13002     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13003     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13004                                      LN0->getChain(),
13005                                      LN0->getBasePtr(), ExtVT,
13006                                      LN0->getMemOperand());
13007     CombineTo(N, ExtLoad);
13008     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13009     AddToWorklist(ExtLoad.getNode());
13010     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13011   }
13012 
13013   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
13014   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
13015       N0.hasOneUse() &&
13016       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
13017       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
13018        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
13019     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13020     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
13021                                      LN0->getChain(),
13022                                      LN0->getBasePtr(), ExtVT,
13023                                      LN0->getMemOperand());
13024     CombineTo(N, ExtLoad);
13025     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13026     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13027   }
13028 
13029   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
13030   // ignore it if the masked load is already sign extended
13031   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
13032     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
13033         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
13034         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
13035       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
13036           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
13037           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
13038           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
13039       CombineTo(N, ExtMaskedLoad);
13040       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
13041       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13042     }
13043   }
13044 
13045   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
13046   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
13047     if (SDValue(GN0, 0).hasOneUse() &&
13048         ExtVT == GN0->getMemoryVT() &&
13049         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
13050       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
13051                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
13052 
13053       SDValue ExtLoad = DAG.getMaskedGather(
13054           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
13055           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
13056 
13057       CombineTo(N, ExtLoad);
13058       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
13059       AddToWorklist(ExtLoad.getNode());
13060       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13061     }
13062   }
13063 
13064   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
13065   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
13066     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
13067                                            N0.getOperand(1), false))
13068       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
13069   }
13070 
13071   return SDValue();
13072 }
13073 
13074 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
13075   SDValue N0 = N->getOperand(0);
13076   EVT VT = N->getValueType(0);
13077 
13078   // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
13079   if (N0.isUndef())
13080     return DAG.getConstant(0, SDLoc(N), VT);
13081 
13082   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13083     return Res;
13084 
13085   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
13086     return SDValue(N, 0);
13087 
13088   return SDValue();
13089 }
13090 
13091 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
13092   SDValue N0 = N->getOperand(0);
13093   EVT VT = N->getValueType(0);
13094   EVT SrcVT = N0.getValueType();
13095   bool isLE = DAG.getDataLayout().isLittleEndian();
13096 
13097   // noop truncate
13098   if (SrcVT == VT)
13099     return N0;
13100 
13101   // fold (truncate (truncate x)) -> (truncate x)
13102   if (N0.getOpcode() == ISD::TRUNCATE)
13103     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13104 
13105   // fold (truncate c1) -> c1
13106   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
13107     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
13108     if (C.getNode() != N)
13109       return C;
13110   }
13111 
13112   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
13113   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
13114       N0.getOpcode() == ISD::SIGN_EXTEND ||
13115       N0.getOpcode() == ISD::ANY_EXTEND) {
13116     // if the source is smaller than the dest, we still need an extend.
13117     if (N0.getOperand(0).getValueType().bitsLT(VT))
13118       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13119     // if the source is larger than the dest, than we just need the truncate.
13120     if (N0.getOperand(0).getValueType().bitsGT(VT))
13121       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
13122     // if the source and dest are the same type, we can drop both the extend
13123     // and the truncate.
13124     return N0.getOperand(0);
13125   }
13126 
13127   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
13128   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
13129     return SDValue();
13130 
13131   // Fold extract-and-trunc into a narrow extract. For example:
13132   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
13133   //   i32 y = TRUNCATE(i64 x)
13134   //        -- becomes --
13135   //   v16i8 b = BITCAST (v2i64 val)
13136   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
13137   //
13138   // Note: We only run this optimization after type legalization (which often
13139   // creates this pattern) and before operation legalization after which
13140   // we need to be more careful about the vector instructions that we generate.
13141   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
13142       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
13143     EVT VecTy = N0.getOperand(0).getValueType();
13144     EVT ExTy = N0.getValueType();
13145     EVT TrTy = N->getValueType(0);
13146 
13147     auto EltCnt = VecTy.getVectorElementCount();
13148     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
13149     auto NewEltCnt = EltCnt * SizeRatio;
13150 
13151     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
13152     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
13153 
13154     SDValue EltNo = N0->getOperand(1);
13155     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
13156       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
13157       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
13158 
13159       SDLoc DL(N);
13160       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
13161                          DAG.getBitcast(NVT, N0.getOperand(0)),
13162                          DAG.getVectorIdxConstant(Index, DL));
13163     }
13164   }
13165 
13166   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
13167   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
13168     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
13169         TLI.isTruncateFree(SrcVT, VT)) {
13170       SDLoc SL(N0);
13171       SDValue Cond = N0.getOperand(0);
13172       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
13173       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
13174       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
13175     }
13176   }
13177 
13178   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
13179   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
13180       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
13181       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
13182     SDValue Amt = N0.getOperand(1);
13183     KnownBits Known = DAG.computeKnownBits(Amt);
13184     unsigned Size = VT.getScalarSizeInBits();
13185     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
13186       SDLoc SL(N);
13187       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
13188 
13189       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
13190       if (AmtVT != Amt.getValueType()) {
13191         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
13192         AddToWorklist(Amt.getNode());
13193       }
13194       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
13195     }
13196   }
13197 
13198   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
13199     return V;
13200 
13201   // Attempt to pre-truncate BUILD_VECTOR sources.
13202   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
13203       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
13204       // Avoid creating illegal types if running after type legalizer.
13205       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
13206     SDLoc DL(N);
13207     EVT SVT = VT.getScalarType();
13208     SmallVector<SDValue, 8> TruncOps;
13209     for (const SDValue &Op : N0->op_values()) {
13210       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
13211       TruncOps.push_back(TruncOp);
13212     }
13213     return DAG.getBuildVector(VT, DL, TruncOps);
13214   }
13215 
13216   // Fold a series of buildvector, bitcast, and truncate if possible.
13217   // For example fold
13218   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
13219   //   (2xi32 (buildvector x, y)).
13220   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
13221       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
13222       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
13223       N0.getOperand(0).hasOneUse()) {
13224     SDValue BuildVect = N0.getOperand(0);
13225     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
13226     EVT TruncVecEltTy = VT.getVectorElementType();
13227 
13228     // Check that the element types match.
13229     if (BuildVectEltTy == TruncVecEltTy) {
13230       // Now we only need to compute the offset of the truncated elements.
13231       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
13232       unsigned TruncVecNumElts = VT.getVectorNumElements();
13233       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
13234 
13235       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
13236              "Invalid number of elements");
13237 
13238       SmallVector<SDValue, 8> Opnds;
13239       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
13240         Opnds.push_back(BuildVect.getOperand(i));
13241 
13242       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13243     }
13244   }
13245 
13246   // See if we can simplify the input to this truncate through knowledge that
13247   // only the low bits are being used.
13248   // For example "trunc (or (shl x, 8), y)" // -> trunc y
13249   // Currently we only perform this optimization on scalars because vectors
13250   // may have different active low bits.
13251   if (!VT.isVector()) {
13252     APInt Mask =
13253         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
13254     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
13255       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
13256   }
13257 
13258   // fold (truncate (load x)) -> (smaller load x)
13259   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
13260   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
13261     if (SDValue Reduced = reduceLoadWidth(N))
13262       return Reduced;
13263 
13264     // Handle the case where the load remains an extending load even
13265     // after truncation.
13266     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
13267       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13268       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
13269         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
13270                                          VT, LN0->getChain(), LN0->getBasePtr(),
13271                                          LN0->getMemoryVT(),
13272                                          LN0->getMemOperand());
13273         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
13274         return NewLoad;
13275       }
13276     }
13277   }
13278 
13279   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
13280   // where ... are all 'undef'.
13281   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
13282     SmallVector<EVT, 8> VTs;
13283     SDValue V;
13284     unsigned Idx = 0;
13285     unsigned NumDefs = 0;
13286 
13287     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
13288       SDValue X = N0.getOperand(i);
13289       if (!X.isUndef()) {
13290         V = X;
13291         Idx = i;
13292         NumDefs++;
13293       }
13294       // Stop if more than one members are non-undef.
13295       if (NumDefs > 1)
13296         break;
13297 
13298       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
13299                                      VT.getVectorElementType(),
13300                                      X.getValueType().getVectorElementCount()));
13301     }
13302 
13303     if (NumDefs == 0)
13304       return DAG.getUNDEF(VT);
13305 
13306     if (NumDefs == 1) {
13307       assert(V.getNode() && "The single defined operand is empty!");
13308       SmallVector<SDValue, 8> Opnds;
13309       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
13310         if (i != Idx) {
13311           Opnds.push_back(DAG.getUNDEF(VTs[i]));
13312           continue;
13313         }
13314         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
13315         AddToWorklist(NV.getNode());
13316         Opnds.push_back(NV);
13317       }
13318       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
13319     }
13320   }
13321 
13322   // Fold truncate of a bitcast of a vector to an extract of the low vector
13323   // element.
13324   //
13325   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
13326   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
13327     SDValue VecSrc = N0.getOperand(0);
13328     EVT VecSrcVT = VecSrc.getValueType();
13329     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
13330         (!LegalOperations ||
13331          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
13332       SDLoc SL(N);
13333 
13334       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
13335       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
13336                          DAG.getVectorIdxConstant(Idx, SL));
13337     }
13338   }
13339 
13340   // Simplify the operands using demanded-bits information.
13341   if (SimplifyDemandedBits(SDValue(N, 0)))
13342     return SDValue(N, 0);
13343 
13344   // fold (truncate (extract_subvector(ext x))) ->
13345   //      (extract_subvector x)
13346   // TODO: This can be generalized to cover cases where the truncate and extract
13347   // do not fully cancel each other out.
13348   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
13349     SDValue N00 = N0.getOperand(0);
13350     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
13351         N00.getOpcode() == ISD::ZERO_EXTEND ||
13352         N00.getOpcode() == ISD::ANY_EXTEND) {
13353       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
13354           VT.getVectorElementType())
13355         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
13356                            N00.getOperand(0), N0.getOperand(1));
13357     }
13358   }
13359 
13360   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13361     return NewVSel;
13362 
13363   // Narrow a suitable binary operation with a non-opaque constant operand by
13364   // moving it ahead of the truncate. This is limited to pre-legalization
13365   // because targets may prefer a wider type during later combines and invert
13366   // this transform.
13367   switch (N0.getOpcode()) {
13368   case ISD::ADD:
13369   case ISD::SUB:
13370   case ISD::MUL:
13371   case ISD::AND:
13372   case ISD::OR:
13373   case ISD::XOR:
13374     if (!LegalOperations && N0.hasOneUse() &&
13375         (isConstantOrConstantVector(N0.getOperand(0), true) ||
13376          isConstantOrConstantVector(N0.getOperand(1), true))) {
13377       // TODO: We already restricted this to pre-legalization, but for vectors
13378       // we are extra cautious to not create an unsupported operation.
13379       // Target-specific changes are likely needed to avoid regressions here.
13380       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
13381         SDLoc DL(N);
13382         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13383         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13384         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
13385       }
13386     }
13387     break;
13388   case ISD::ADDE:
13389   case ISD::ADDCARRY:
13390     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
13391     // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
13392     // When the adde's carry is not used.
13393     // We only do for addcarry before legalize operation
13394     if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
13395          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
13396         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
13397       SDLoc DL(N);
13398       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
13399       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
13400       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
13401       return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
13402     }
13403     break;
13404   case ISD::USUBSAT:
13405     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
13406     // enough to know that the upper bits are zero we must ensure that we don't
13407     // introduce an extra truncate.
13408     if (!LegalOperations && N0.hasOneUse() &&
13409         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13410         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
13411             VT.getScalarSizeInBits() &&
13412         hasOperation(N0.getOpcode(), VT)) {
13413       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
13414                                  DAG, SDLoc(N));
13415     }
13416     break;
13417   }
13418 
13419   return SDValue();
13420 }
13421 
13422 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
13423   SDValue Elt = N->getOperand(i);
13424   if (Elt.getOpcode() != ISD::MERGE_VALUES)
13425     return Elt.getNode();
13426   return Elt.getOperand(Elt.getResNo()).getNode();
13427 }
13428 
13429 /// build_pair (load, load) -> load
13430 /// if load locations are consecutive.
13431 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
13432   assert(N->getOpcode() == ISD::BUILD_PAIR);
13433 
13434   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
13435   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
13436 
13437   // A BUILD_PAIR is always having the least significant part in elt 0 and the
13438   // most significant part in elt 1. So when combining into one large load, we
13439   // need to consider the endianness.
13440   if (DAG.getDataLayout().isBigEndian())
13441     std::swap(LD1, LD2);
13442 
13443   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
13444       !LD1->hasOneUse() || !LD2->hasOneUse() ||
13445       LD1->getAddressSpace() != LD2->getAddressSpace())
13446     return SDValue();
13447 
13448   bool LD1Fast = false;
13449   EVT LD1VT = LD1->getValueType(0);
13450   unsigned LD1Bytes = LD1VT.getStoreSize();
13451   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
13452       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
13453       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
13454                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
13455     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
13456                        LD1->getPointerInfo(), LD1->getAlign());
13457 
13458   return SDValue();
13459 }
13460 
13461 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
13462   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
13463   // and Lo parts; on big-endian machines it doesn't.
13464   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
13465 }
13466 
13467 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
13468                                     const TargetLowering &TLI) {
13469   // If this is not a bitcast to an FP type or if the target doesn't have
13470   // IEEE754-compliant FP logic, we're done.
13471   EVT VT = N->getValueType(0);
13472   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
13473     return SDValue();
13474 
13475   // TODO: Handle cases where the integer constant is a different scalar
13476   // bitwidth to the FP.
13477   SDValue N0 = N->getOperand(0);
13478   EVT SourceVT = N0.getValueType();
13479   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
13480     return SDValue();
13481 
13482   unsigned FPOpcode;
13483   APInt SignMask;
13484   switch (N0.getOpcode()) {
13485   case ISD::AND:
13486     FPOpcode = ISD::FABS;
13487     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
13488     break;
13489   case ISD::XOR:
13490     FPOpcode = ISD::FNEG;
13491     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13492     break;
13493   case ISD::OR:
13494     FPOpcode = ISD::FABS;
13495     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
13496     break;
13497   default:
13498     return SDValue();
13499   }
13500 
13501   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
13502   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
13503   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
13504   //   fneg (fabs X)
13505   SDValue LogicOp0 = N0.getOperand(0);
13506   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
13507   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
13508       LogicOp0.getOpcode() == ISD::BITCAST &&
13509       LogicOp0.getOperand(0).getValueType() == VT) {
13510     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
13511     NumFPLogicOpsConv++;
13512     if (N0.getOpcode() == ISD::OR)
13513       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
13514     return FPOp;
13515   }
13516 
13517   return SDValue();
13518 }
13519 
13520 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
13521   SDValue N0 = N->getOperand(0);
13522   EVT VT = N->getValueType(0);
13523 
13524   if (N0.isUndef())
13525     return DAG.getUNDEF(VT);
13526 
13527   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
13528   // Only do this before legalize types, unless both types are integer and the
13529   // scalar type is legal. Only do this before legalize ops, since the target
13530   // maybe depending on the bitcast.
13531   // First check to see if this is all constant.
13532   // TODO: Support FP bitcasts after legalize types.
13533   if (VT.isVector() &&
13534       (!LegalTypes ||
13535        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
13536         TLI.isTypeLegal(VT.getVectorElementType()))) &&
13537       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
13538       cast<BuildVectorSDNode>(N0)->isConstant())
13539     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
13540                                              VT.getVectorElementType());
13541 
13542   // If the input is a constant, let getNode fold it.
13543   if (isIntOrFPConstant(N0)) {
13544     // If we can't allow illegal operations, we need to check that this is just
13545     // a fp -> int or int -> conversion and that the resulting operation will
13546     // be legal.
13547     if (!LegalOperations ||
13548         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
13549          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
13550         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
13551          TLI.isOperationLegal(ISD::Constant, VT))) {
13552       SDValue C = DAG.getBitcast(VT, N0);
13553       if (C.getNode() != N)
13554         return C;
13555     }
13556   }
13557 
13558   // (conv (conv x, t1), t2) -> (conv x, t2)
13559   if (N0.getOpcode() == ISD::BITCAST)
13560     return DAG.getBitcast(VT, N0.getOperand(0));
13561 
13562   // fold (conv (load x)) -> (load (conv*)x)
13563   // If the resultant load doesn't need a higher alignment than the original!
13564   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13565       // Do not remove the cast if the types differ in endian layout.
13566       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
13567           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
13568       // If the load is volatile, we only want to change the load type if the
13569       // resulting load is legal. Otherwise we might increase the number of
13570       // memory accesses. We don't care if the original type was legal or not
13571       // as we assume software couldn't rely on the number of accesses of an
13572       // illegal type.
13573       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
13574        TLI.isOperationLegal(ISD::LOAD, VT))) {
13575     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13576 
13577     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
13578                                     *LN0->getMemOperand())) {
13579       SDValue Load =
13580           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
13581                       LN0->getPointerInfo(), LN0->getAlign(),
13582                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
13583       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
13584       return Load;
13585     }
13586   }
13587 
13588   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
13589     return V;
13590 
13591   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
13592   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
13593   //
13594   // For ppc_fp128:
13595   // fold (bitcast (fneg x)) ->
13596   //     flipbit = signbit
13597   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13598   //
13599   // fold (bitcast (fabs x)) ->
13600   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
13601   //     (xor (bitcast x) (build_pair flipbit, flipbit))
13602   // This often reduces constant pool loads.
13603   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
13604        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
13605       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
13606       !N0.getValueType().isVector()) {
13607     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
13608     AddToWorklist(NewConv.getNode());
13609 
13610     SDLoc DL(N);
13611     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13612       assert(VT.getSizeInBits() == 128);
13613       SDValue SignBit = DAG.getConstant(
13614           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
13615       SDValue FlipBit;
13616       if (N0.getOpcode() == ISD::FNEG) {
13617         FlipBit = SignBit;
13618         AddToWorklist(FlipBit.getNode());
13619       } else {
13620         assert(N0.getOpcode() == ISD::FABS);
13621         SDValue Hi =
13622             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
13623                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13624                                               SDLoc(NewConv)));
13625         AddToWorklist(Hi.getNode());
13626         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
13627         AddToWorklist(FlipBit.getNode());
13628       }
13629       SDValue FlipBits =
13630           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13631       AddToWorklist(FlipBits.getNode());
13632       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
13633     }
13634     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13635     if (N0.getOpcode() == ISD::FNEG)
13636       return DAG.getNode(ISD::XOR, DL, VT,
13637                          NewConv, DAG.getConstant(SignBit, DL, VT));
13638     assert(N0.getOpcode() == ISD::FABS);
13639     return DAG.getNode(ISD::AND, DL, VT,
13640                        NewConv, DAG.getConstant(~SignBit, DL, VT));
13641   }
13642 
13643   // fold (bitconvert (fcopysign cst, x)) ->
13644   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
13645   // Note that we don't handle (copysign x, cst) because this can always be
13646   // folded to an fneg or fabs.
13647   //
13648   // For ppc_fp128:
13649   // fold (bitcast (fcopysign cst, x)) ->
13650   //     flipbit = (and (extract_element
13651   //                     (xor (bitcast cst), (bitcast x)), 0),
13652   //                    signbit)
13653   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
13654   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
13655       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
13656       !VT.isVector()) {
13657     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
13658     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
13659     if (isTypeLegal(IntXVT)) {
13660       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
13661       AddToWorklist(X.getNode());
13662 
13663       // If X has a different width than the result/lhs, sext it or truncate it.
13664       unsigned VTWidth = VT.getSizeInBits();
13665       if (OrigXWidth < VTWidth) {
13666         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
13667         AddToWorklist(X.getNode());
13668       } else if (OrigXWidth > VTWidth) {
13669         // To get the sign bit in the right place, we have to shift it right
13670         // before truncating.
13671         SDLoc DL(X);
13672         X = DAG.getNode(ISD::SRL, DL,
13673                         X.getValueType(), X,
13674                         DAG.getConstant(OrigXWidth-VTWidth, DL,
13675                                         X.getValueType()));
13676         AddToWorklist(X.getNode());
13677         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
13678         AddToWorklist(X.getNode());
13679       }
13680 
13681       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
13682         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
13683         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13684         AddToWorklist(Cst.getNode());
13685         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
13686         AddToWorklist(X.getNode());
13687         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
13688         AddToWorklist(XorResult.getNode());
13689         SDValue XorResult64 = DAG.getNode(
13690             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
13691             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
13692                                   SDLoc(XorResult)));
13693         AddToWorklist(XorResult64.getNode());
13694         SDValue FlipBit =
13695             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
13696                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
13697         AddToWorklist(FlipBit.getNode());
13698         SDValue FlipBits =
13699             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
13700         AddToWorklist(FlipBits.getNode());
13701         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
13702       }
13703       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
13704       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
13705                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
13706       AddToWorklist(X.getNode());
13707 
13708       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
13709       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
13710                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
13711       AddToWorklist(Cst.getNode());
13712 
13713       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
13714     }
13715   }
13716 
13717   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
13718   if (N0.getOpcode() == ISD::BUILD_PAIR)
13719     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
13720       return CombineLD;
13721 
13722   // Remove double bitcasts from shuffles - this is often a legacy of
13723   // XformToShuffleWithZero being used to combine bitmaskings (of
13724   // float vectors bitcast to integer vectors) into shuffles.
13725   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
13726   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
13727       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
13728       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
13729       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
13730     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
13731 
13732     // If operands are a bitcast, peek through if it casts the original VT.
13733     // If operands are a constant, just bitcast back to original VT.
13734     auto PeekThroughBitcast = [&](SDValue Op) {
13735       if (Op.getOpcode() == ISD::BITCAST &&
13736           Op.getOperand(0).getValueType() == VT)
13737         return SDValue(Op.getOperand(0));
13738       if (Op.isUndef() || isAnyConstantBuildVector(Op))
13739         return DAG.getBitcast(VT, Op);
13740       return SDValue();
13741     };
13742 
13743     // FIXME: If either input vector is bitcast, try to convert the shuffle to
13744     // the result type of this bitcast. This would eliminate at least one
13745     // bitcast. See the transform in InstCombine.
13746     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
13747     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
13748     if (!(SV0 && SV1))
13749       return SDValue();
13750 
13751     int MaskScale =
13752         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
13753     SmallVector<int, 8> NewMask;
13754     for (int M : SVN->getMask())
13755       for (int i = 0; i != MaskScale; ++i)
13756         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
13757 
13758     SDValue LegalShuffle =
13759         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
13760     if (LegalShuffle)
13761       return LegalShuffle;
13762   }
13763 
13764   return SDValue();
13765 }
13766 
13767 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
13768   EVT VT = N->getValueType(0);
13769   return CombineConsecutiveLoads(N, VT);
13770 }
13771 
13772 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
13773   SDValue N0 = N->getOperand(0);
13774 
13775   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
13776     return N0;
13777 
13778   // Fold freeze(bitcast(x)) -> bitcast(freeze(x)).
13779   // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold.
13780   if (N0.getOpcode() == ISD::BITCAST)
13781     return DAG.getBitcast(N->getValueType(0),
13782                           DAG.getNode(ISD::FREEZE, SDLoc(N0),
13783                                       N0.getOperand(0).getValueType(),
13784                                       N0.getOperand(0)));
13785 
13786   return SDValue();
13787 }
13788 
13789 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
13790 /// operands. DstEltVT indicates the destination element value type.
13791 SDValue DAGCombiner::
13792 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
13793   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
13794 
13795   // If this is already the right type, we're done.
13796   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
13797 
13798   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
13799   unsigned DstBitSize = DstEltVT.getSizeInBits();
13800 
13801   // If this is a conversion of N elements of one type to N elements of another
13802   // type, convert each element.  This handles FP<->INT cases.
13803   if (SrcBitSize == DstBitSize) {
13804     SmallVector<SDValue, 8> Ops;
13805     for (SDValue Op : BV->op_values()) {
13806       // If the vector element type is not legal, the BUILD_VECTOR operands
13807       // are promoted and implicitly truncated.  Make that explicit here.
13808       if (Op.getValueType() != SrcEltVT)
13809         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
13810       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
13811       AddToWorklist(Ops.back().getNode());
13812     }
13813     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
13814                               BV->getValueType(0).getVectorNumElements());
13815     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
13816   }
13817 
13818   // Otherwise, we're growing or shrinking the elements.  To avoid having to
13819   // handle annoying details of growing/shrinking FP values, we convert them to
13820   // int first.
13821   if (SrcEltVT.isFloatingPoint()) {
13822     // Convert the input float vector to a int vector where the elements are the
13823     // same sizes.
13824     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
13825     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
13826     SrcEltVT = IntVT;
13827   }
13828 
13829   // Now we know the input is an integer vector.  If the output is a FP type,
13830   // convert to integer first, then to FP of the right size.
13831   if (DstEltVT.isFloatingPoint()) {
13832     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
13833     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
13834 
13835     // Next, convert to FP elements of the same size.
13836     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
13837   }
13838 
13839   // Okay, we know the src/dst types are both integers of differing types.
13840   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
13841 
13842   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
13843   // BuildVectorSDNode?
13844   auto *BVN = cast<BuildVectorSDNode>(BV);
13845 
13846   // Extract the constant raw bit data.
13847   BitVector UndefElements;
13848   SmallVector<APInt> RawBits;
13849   bool IsLE = DAG.getDataLayout().isLittleEndian();
13850   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
13851     return SDValue();
13852 
13853   SDLoc DL(BV);
13854   SmallVector<SDValue, 8> Ops;
13855   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
13856     if (UndefElements[I])
13857       Ops.push_back(DAG.getUNDEF(DstEltVT));
13858     else
13859       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
13860   }
13861 
13862   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
13863   return DAG.getBuildVector(VT, DL, Ops);
13864 }
13865 
13866 // Returns true if floating point contraction is allowed on the FMUL-SDValue
13867 // `N`
13868 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
13869   assert(N.getOpcode() == ISD::FMUL);
13870 
13871   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
13872          N->getFlags().hasAllowContract();
13873 }
13874 
13875 // Returns true if `N` can assume no infinities involved in its computation.
13876 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
13877   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
13878 }
13879 
13880 /// Try to perform FMA combining on a given FADD node.
13881 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
13882   SDValue N0 = N->getOperand(0);
13883   SDValue N1 = N->getOperand(1);
13884   EVT VT = N->getValueType(0);
13885   SDLoc SL(N);
13886 
13887   const TargetOptions &Options = DAG.getTarget().Options;
13888 
13889   // Floating-point multiply-add with intermediate rounding.
13890   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13891 
13892   // Floating-point multiply-add without intermediate rounding.
13893   bool HasFMA =
13894       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13895       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13896 
13897   // No valid opcode, do not combine.
13898   if (!HasFMAD && !HasFMA)
13899     return SDValue();
13900 
13901   bool CanReassociate =
13902       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13903   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13904                               Options.UnsafeFPMath || HasFMAD);
13905   // If the addition is not contractable, do not combine.
13906   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
13907     return SDValue();
13908 
13909   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13910     return SDValue();
13911 
13912   // Always prefer FMAD to FMA for precision.
13913   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13914   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13915 
13916   auto isFusedOp = [&](SDValue N) {
13917     unsigned Opcode = N.getOpcode();
13918     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
13919   };
13920 
13921   // Is the node an FMUL and contractable either due to global flags or
13922   // SDNodeFlags.
13923   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13924     if (N.getOpcode() != ISD::FMUL)
13925       return false;
13926     return AllowFusionGlobally || N->getFlags().hasAllowContract();
13927   };
13928   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
13929   // prefer to fold the multiply with fewer uses.
13930   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
13931     if (N0->use_size() > N1->use_size())
13932       std::swap(N0, N1);
13933   }
13934 
13935   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
13936   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
13937     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13938                        N0.getOperand(1), N1);
13939   }
13940 
13941   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
13942   // Note: Commutes FADD operands.
13943   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
13944     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
13945                        N1.getOperand(1), N0);
13946   }
13947 
13948   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
13949   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
13950   // This requires reassociation because it changes the order of operations.
13951   SDValue FMA, E;
13952   if (CanReassociate && isFusedOp(N0) &&
13953       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
13954       N0.getOperand(2).hasOneUse()) {
13955     FMA = N0;
13956     E = N1;
13957   } else if (CanReassociate && isFusedOp(N1) &&
13958              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
13959              N1.getOperand(2).hasOneUse()) {
13960     FMA = N1;
13961     E = N0;
13962   }
13963   if (FMA && E) {
13964     SDValue A = FMA.getOperand(0);
13965     SDValue B = FMA.getOperand(1);
13966     SDValue C = FMA.getOperand(2).getOperand(0);
13967     SDValue D = FMA.getOperand(2).getOperand(1);
13968     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
13969     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
13970   }
13971 
13972   // Look through FP_EXTEND nodes to do more combining.
13973 
13974   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
13975   if (N0.getOpcode() == ISD::FP_EXTEND) {
13976     SDValue N00 = N0.getOperand(0);
13977     if (isContractableFMUL(N00) &&
13978         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13979                             N00.getValueType())) {
13980       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13981                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13982                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13983                          N1);
13984     }
13985   }
13986 
13987   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
13988   // Note: Commutes FADD operands.
13989   if (N1.getOpcode() == ISD::FP_EXTEND) {
13990     SDValue N10 = N1.getOperand(0);
13991     if (isContractableFMUL(N10) &&
13992         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13993                             N10.getValueType())) {
13994       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13995                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
13996                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
13997                          N0);
13998     }
13999   }
14000 
14001   // More folding opportunities when target permits.
14002   if (Aggressive) {
14003     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
14004     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
14005     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14006                                     SDValue Z) {
14007       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
14008                          DAG.getNode(PreferredFusedOpcode, SL, VT,
14009                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14010                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
14011                                      Z));
14012     };
14013     if (isFusedOp(N0)) {
14014       SDValue N02 = N0.getOperand(2);
14015       if (N02.getOpcode() == ISD::FP_EXTEND) {
14016         SDValue N020 = N02.getOperand(0);
14017         if (isContractableFMUL(N020) &&
14018             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14019                                 N020.getValueType())) {
14020           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
14021                                       N020.getOperand(0), N020.getOperand(1),
14022                                       N1);
14023         }
14024       }
14025     }
14026 
14027     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
14028     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
14029     // FIXME: This turns two single-precision and one double-precision
14030     // operation into two double-precision operations, which might not be
14031     // interesting for all targets, especially GPUs.
14032     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
14033                                     SDValue Z) {
14034       return DAG.getNode(
14035           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
14036           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
14037           DAG.getNode(PreferredFusedOpcode, SL, VT,
14038                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
14039                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
14040     };
14041     if (N0.getOpcode() == ISD::FP_EXTEND) {
14042       SDValue N00 = N0.getOperand(0);
14043       if (isFusedOp(N00)) {
14044         SDValue N002 = N00.getOperand(2);
14045         if (isContractableFMUL(N002) &&
14046             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14047                                 N00.getValueType())) {
14048           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
14049                                       N002.getOperand(0), N002.getOperand(1),
14050                                       N1);
14051         }
14052       }
14053     }
14054 
14055     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
14056     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
14057     if (isFusedOp(N1)) {
14058       SDValue N12 = N1.getOperand(2);
14059       if (N12.getOpcode() == ISD::FP_EXTEND) {
14060         SDValue N120 = N12.getOperand(0);
14061         if (isContractableFMUL(N120) &&
14062             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14063                                 N120.getValueType())) {
14064           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
14065                                       N120.getOperand(0), N120.getOperand(1),
14066                                       N0);
14067         }
14068       }
14069     }
14070 
14071     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
14072     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
14073     // FIXME: This turns two single-precision and one double-precision
14074     // operation into two double-precision operations, which might not be
14075     // interesting for all targets, especially GPUs.
14076     if (N1.getOpcode() == ISD::FP_EXTEND) {
14077       SDValue N10 = N1.getOperand(0);
14078       if (isFusedOp(N10)) {
14079         SDValue N102 = N10.getOperand(2);
14080         if (isContractableFMUL(N102) &&
14081             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14082                                 N10.getValueType())) {
14083           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
14084                                       N102.getOperand(0), N102.getOperand(1),
14085                                       N0);
14086         }
14087       }
14088     }
14089   }
14090 
14091   return SDValue();
14092 }
14093 
14094 /// Try to perform FMA combining on a given FSUB node.
14095 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
14096   SDValue N0 = N->getOperand(0);
14097   SDValue N1 = N->getOperand(1);
14098   EVT VT = N->getValueType(0);
14099   SDLoc SL(N);
14100 
14101   const TargetOptions &Options = DAG.getTarget().Options;
14102   // Floating-point multiply-add with intermediate rounding.
14103   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
14104 
14105   // Floating-point multiply-add without intermediate rounding.
14106   bool HasFMA =
14107       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14108       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14109 
14110   // No valid opcode, do not combine.
14111   if (!HasFMAD && !HasFMA)
14112     return SDValue();
14113 
14114   const SDNodeFlags Flags = N->getFlags();
14115   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
14116                               Options.UnsafeFPMath || HasFMAD);
14117 
14118   // If the subtraction is not contractable, do not combine.
14119   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
14120     return SDValue();
14121 
14122   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
14123     return SDValue();
14124 
14125   // Always prefer FMAD to FMA for precision.
14126   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14127   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14128   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
14129 
14130   // Is the node an FMUL and contractable either due to global flags or
14131   // SDNodeFlags.
14132   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
14133     if (N.getOpcode() != ISD::FMUL)
14134       return false;
14135     return AllowFusionGlobally || N->getFlags().hasAllowContract();
14136   };
14137 
14138   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14139   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
14140     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
14141       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
14142                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
14143     }
14144     return SDValue();
14145   };
14146 
14147   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14148   // Note: Commutes FSUB operands.
14149   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
14150     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
14151       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14152                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
14153                          YZ.getOperand(1), X);
14154     }
14155     return SDValue();
14156   };
14157 
14158   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
14159   // prefer to fold the multiply with fewer uses.
14160   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
14161       (N0->use_size() > N1->use_size())) {
14162     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
14163     if (SDValue V = tryToFoldXSubYZ(N0, N1))
14164       return V;
14165     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
14166     if (SDValue V = tryToFoldXYSubZ(N0, N1))
14167       return V;
14168   } else {
14169     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
14170     if (SDValue V = tryToFoldXYSubZ(N0, N1))
14171       return V;
14172     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
14173     if (SDValue V = tryToFoldXSubYZ(N0, N1))
14174       return V;
14175   }
14176 
14177   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
14178   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
14179       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
14180     SDValue N00 = N0.getOperand(0).getOperand(0);
14181     SDValue N01 = N0.getOperand(0).getOperand(1);
14182     return DAG.getNode(PreferredFusedOpcode, SL, VT,
14183                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
14184                        DAG.getNode(ISD::FNEG, SL, VT, N1));
14185   }
14186 
14187   // Look through FP_EXTEND nodes to do more combining.
14188 
14189   // fold (fsub (fpext (fmul x, y)), z)
14190   //   -> (fma (fpext x), (fpext y), (fneg z))
14191   if (N0.getOpcode() == ISD::FP_EXTEND) {
14192     SDValue N00 = N0.getOperand(0);
14193     if (isContractableFMUL(N00) &&
14194         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14195                             N00.getValueType())) {
14196       return DAG.getNode(PreferredFusedOpcode, SL, VT,
14197                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14198                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14199                          DAG.getNode(ISD::FNEG, SL, VT, N1));
14200     }
14201   }
14202 
14203   // fold (fsub x, (fpext (fmul y, z)))
14204   //   -> (fma (fneg (fpext y)), (fpext z), x)
14205   // Note: Commutes FSUB operands.
14206   if (N1.getOpcode() == ISD::FP_EXTEND) {
14207     SDValue N10 = N1.getOperand(0);
14208     if (isContractableFMUL(N10) &&
14209         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14210                             N10.getValueType())) {
14211       return DAG.getNode(
14212           PreferredFusedOpcode, SL, VT,
14213           DAG.getNode(ISD::FNEG, SL, VT,
14214                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
14215           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
14216     }
14217   }
14218 
14219   // fold (fsub (fpext (fneg (fmul, x, y))), z)
14220   //   -> (fneg (fma (fpext x), (fpext y), z))
14221   // Note: This could be removed with appropriate canonicalization of the
14222   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14223   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14224   // from implementing the canonicalization in visitFSUB.
14225   if (N0.getOpcode() == ISD::FP_EXTEND) {
14226     SDValue N00 = N0.getOperand(0);
14227     if (N00.getOpcode() == ISD::FNEG) {
14228       SDValue N000 = N00.getOperand(0);
14229       if (isContractableFMUL(N000) &&
14230           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14231                               N00.getValueType())) {
14232         return DAG.getNode(
14233             ISD::FNEG, SL, VT,
14234             DAG.getNode(PreferredFusedOpcode, SL, VT,
14235                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14236                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14237                         N1));
14238       }
14239     }
14240   }
14241 
14242   // fold (fsub (fneg (fpext (fmul, x, y))), z)
14243   //   -> (fneg (fma (fpext x)), (fpext y), z)
14244   // Note: This could be removed with appropriate canonicalization of the
14245   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
14246   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
14247   // from implementing the canonicalization in visitFSUB.
14248   if (N0.getOpcode() == ISD::FNEG) {
14249     SDValue N00 = N0.getOperand(0);
14250     if (N00.getOpcode() == ISD::FP_EXTEND) {
14251       SDValue N000 = N00.getOperand(0);
14252       if (isContractableFMUL(N000) &&
14253           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14254                               N000.getValueType())) {
14255         return DAG.getNode(
14256             ISD::FNEG, SL, VT,
14257             DAG.getNode(PreferredFusedOpcode, SL, VT,
14258                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
14259                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
14260                         N1));
14261       }
14262     }
14263   }
14264 
14265   auto isReassociable = [Options](SDNode *N) {
14266     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14267   };
14268 
14269   auto isContractableAndReassociableFMUL = [isContractableFMUL,
14270                                             isReassociable](SDValue N) {
14271     return isContractableFMUL(N) && isReassociable(N.getNode());
14272   };
14273 
14274   auto isFusedOp = [&](SDValue N) {
14275     unsigned Opcode = N.getOpcode();
14276     return Opcode == ISD::FMA || Opcode == ISD::FMAD;
14277   };
14278 
14279   // More folding opportunities when target permits.
14280   if (Aggressive && isReassociable(N)) {
14281     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
14282     // fold (fsub (fma x, y, (fmul u, v)), z)
14283     //   -> (fma x, y (fma u, v, (fneg z)))
14284     if (CanFuse && isFusedOp(N0) &&
14285         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
14286         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
14287       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
14288                          N0.getOperand(1),
14289                          DAG.getNode(PreferredFusedOpcode, SL, VT,
14290                                      N0.getOperand(2).getOperand(0),
14291                                      N0.getOperand(2).getOperand(1),
14292                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
14293     }
14294 
14295     // fold (fsub x, (fma y, z, (fmul u, v)))
14296     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
14297     if (CanFuse && isFusedOp(N1) &&
14298         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
14299         N1->hasOneUse() && NoSignedZero) {
14300       SDValue N20 = N1.getOperand(2).getOperand(0);
14301       SDValue N21 = N1.getOperand(2).getOperand(1);
14302       return DAG.getNode(
14303           PreferredFusedOpcode, SL, VT,
14304           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14305           DAG.getNode(PreferredFusedOpcode, SL, VT,
14306                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
14307     }
14308 
14309     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
14310     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
14311     if (isFusedOp(N0) && N0->hasOneUse()) {
14312       SDValue N02 = N0.getOperand(2);
14313       if (N02.getOpcode() == ISD::FP_EXTEND) {
14314         SDValue N020 = N02.getOperand(0);
14315         if (isContractableAndReassociableFMUL(N020) &&
14316             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14317                                 N020.getValueType())) {
14318           return DAG.getNode(
14319               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
14320               DAG.getNode(
14321                   PreferredFusedOpcode, SL, VT,
14322                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
14323                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
14324                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14325         }
14326       }
14327     }
14328 
14329     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
14330     //   -> (fma (fpext x), (fpext y),
14331     //           (fma (fpext u), (fpext v), (fneg z)))
14332     // FIXME: This turns two single-precision and one double-precision
14333     // operation into two double-precision operations, which might not be
14334     // interesting for all targets, especially GPUs.
14335     if (N0.getOpcode() == ISD::FP_EXTEND) {
14336       SDValue N00 = N0.getOperand(0);
14337       if (isFusedOp(N00)) {
14338         SDValue N002 = N00.getOperand(2);
14339         if (isContractableAndReassociableFMUL(N002) &&
14340             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14341                                 N00.getValueType())) {
14342           return DAG.getNode(
14343               PreferredFusedOpcode, SL, VT,
14344               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
14345               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
14346               DAG.getNode(
14347                   PreferredFusedOpcode, SL, VT,
14348                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
14349                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
14350                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
14351         }
14352       }
14353     }
14354 
14355     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
14356     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
14357     if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
14358         N1->hasOneUse()) {
14359       SDValue N120 = N1.getOperand(2).getOperand(0);
14360       if (isContractableAndReassociableFMUL(N120) &&
14361           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14362                               N120.getValueType())) {
14363         SDValue N1200 = N120.getOperand(0);
14364         SDValue N1201 = N120.getOperand(1);
14365         return DAG.getNode(
14366             PreferredFusedOpcode, SL, VT,
14367             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
14368             DAG.getNode(PreferredFusedOpcode, SL, VT,
14369                         DAG.getNode(ISD::FNEG, SL, VT,
14370                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
14371                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
14372       }
14373     }
14374 
14375     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
14376     //   -> (fma (fneg (fpext y)), (fpext z),
14377     //           (fma (fneg (fpext u)), (fpext v), x))
14378     // FIXME: This turns two single-precision and one double-precision
14379     // operation into two double-precision operations, which might not be
14380     // interesting for all targets, especially GPUs.
14381     if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
14382       SDValue CvtSrc = N1.getOperand(0);
14383       SDValue N100 = CvtSrc.getOperand(0);
14384       SDValue N101 = CvtSrc.getOperand(1);
14385       SDValue N102 = CvtSrc.getOperand(2);
14386       if (isContractableAndReassociableFMUL(N102) &&
14387           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
14388                               CvtSrc.getValueType())) {
14389         SDValue N1020 = N102.getOperand(0);
14390         SDValue N1021 = N102.getOperand(1);
14391         return DAG.getNode(
14392             PreferredFusedOpcode, SL, VT,
14393             DAG.getNode(ISD::FNEG, SL, VT,
14394                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
14395             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
14396             DAG.getNode(PreferredFusedOpcode, SL, VT,
14397                         DAG.getNode(ISD::FNEG, SL, VT,
14398                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
14399                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
14400       }
14401     }
14402   }
14403 
14404   return SDValue();
14405 }
14406 
14407 /// Try to perform FMA combining on a given FMUL node based on the distributive
14408 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
14409 /// subtraction instead of addition).
14410 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
14411   SDValue N0 = N->getOperand(0);
14412   SDValue N1 = N->getOperand(1);
14413   EVT VT = N->getValueType(0);
14414   SDLoc SL(N);
14415 
14416   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
14417 
14418   const TargetOptions &Options = DAG.getTarget().Options;
14419 
14420   // The transforms below are incorrect when x == 0 and y == inf, because the
14421   // intermediate multiplication produces a nan.
14422   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
14423   if (!hasNoInfs(Options, FAdd))
14424     return SDValue();
14425 
14426   // Floating-point multiply-add without intermediate rounding.
14427   bool HasFMA =
14428       isContractableFMUL(Options, SDValue(N, 0)) &&
14429       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
14430       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
14431 
14432   // Floating-point multiply-add with intermediate rounding. This can result
14433   // in a less precise result due to the changed rounding order.
14434   bool HasFMAD = Options.UnsafeFPMath &&
14435                  (LegalOperations && TLI.isFMADLegal(DAG, N));
14436 
14437   // No valid opcode, do not combine.
14438   if (!HasFMAD && !HasFMA)
14439     return SDValue();
14440 
14441   // Always prefer FMAD to FMA for precision.
14442   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
14443   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
14444 
14445   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
14446   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
14447   auto FuseFADD = [&](SDValue X, SDValue Y) {
14448     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
14449       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
14450         if (C->isExactlyValue(+1.0))
14451           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14452                              Y);
14453         if (C->isExactlyValue(-1.0))
14454           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14455                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14456       }
14457     }
14458     return SDValue();
14459   };
14460 
14461   if (SDValue FMA = FuseFADD(N0, N1))
14462     return FMA;
14463   if (SDValue FMA = FuseFADD(N1, N0))
14464     return FMA;
14465 
14466   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
14467   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
14468   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
14469   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
14470   auto FuseFSUB = [&](SDValue X, SDValue Y) {
14471     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
14472       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
14473         if (C0->isExactlyValue(+1.0))
14474           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14475                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14476                              Y);
14477         if (C0->isExactlyValue(-1.0))
14478           return DAG.getNode(PreferredFusedOpcode, SL, VT,
14479                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
14480                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14481       }
14482       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
14483         if (C1->isExactlyValue(+1.0))
14484           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14485                              DAG.getNode(ISD::FNEG, SL, VT, Y));
14486         if (C1->isExactlyValue(-1.0))
14487           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
14488                              Y);
14489       }
14490     }
14491     return SDValue();
14492   };
14493 
14494   if (SDValue FMA = FuseFSUB(N0, N1))
14495     return FMA;
14496   if (SDValue FMA = FuseFSUB(N1, N0))
14497     return FMA;
14498 
14499   return SDValue();
14500 }
14501 
14502 SDValue DAGCombiner::visitFADD(SDNode *N) {
14503   SDValue N0 = N->getOperand(0);
14504   SDValue N1 = N->getOperand(1);
14505   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14506   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14507   EVT VT = N->getValueType(0);
14508   SDLoc DL(N);
14509   const TargetOptions &Options = DAG.getTarget().Options;
14510   SDNodeFlags Flags = N->getFlags();
14511   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14512 
14513   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14514     return R;
14515 
14516   // fold (fadd c1, c2) -> c1 + c2
14517   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
14518     return C;
14519 
14520   // canonicalize constant to RHS
14521   if (N0CFP && !N1CFP)
14522     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
14523 
14524   // fold vector ops
14525   if (VT.isVector())
14526     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14527       return FoldedVOp;
14528 
14529   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
14530   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
14531   if (N1C && N1C->isZero())
14532     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
14533       return N0;
14534 
14535   if (SDValue NewSel = foldBinOpIntoSelect(N))
14536     return NewSel;
14537 
14538   // fold (fadd A, (fneg B)) -> (fsub A, B)
14539   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14540     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14541             N1, DAG, LegalOperations, ForCodeSize))
14542       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
14543 
14544   // fold (fadd (fneg A), B) -> (fsub B, A)
14545   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
14546     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14547             N0, DAG, LegalOperations, ForCodeSize))
14548       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
14549 
14550   auto isFMulNegTwo = [](SDValue FMul) {
14551     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
14552       return false;
14553     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
14554     return C && C->isExactlyValue(-2.0);
14555   };
14556 
14557   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
14558   if (isFMulNegTwo(N0)) {
14559     SDValue B = N0.getOperand(0);
14560     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14561     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
14562   }
14563   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
14564   if (isFMulNegTwo(N1)) {
14565     SDValue B = N1.getOperand(0);
14566     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
14567     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
14568   }
14569 
14570   // No FP constant should be created after legalization as Instruction
14571   // Selection pass has a hard time dealing with FP constants.
14572   bool AllowNewConst = (Level < AfterLegalizeDAG);
14573 
14574   // If nnan is enabled, fold lots of things.
14575   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
14576     // If allowed, fold (fadd (fneg x), x) -> 0.0
14577     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
14578       return DAG.getConstantFP(0.0, DL, VT);
14579 
14580     // If allowed, fold (fadd x, (fneg x)) -> 0.0
14581     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
14582       return DAG.getConstantFP(0.0, DL, VT);
14583   }
14584 
14585   // If 'unsafe math' or reassoc and nsz, fold lots of things.
14586   // TODO: break out portions of the transformations below for which Unsafe is
14587   //       considered and which do not require both nsz and reassoc
14588   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14589        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14590       AllowNewConst) {
14591     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
14592     if (N1CFP && N0.getOpcode() == ISD::FADD &&
14593         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14594       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
14595       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
14596     }
14597 
14598     // We can fold chains of FADD's of the same value into multiplications.
14599     // This transform is not safe in general because we are reducing the number
14600     // of rounding steps.
14601     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
14602       if (N0.getOpcode() == ISD::FMUL) {
14603         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14604         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
14605 
14606         // (fadd (fmul x, c), x) -> (fmul x, c+1)
14607         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
14608           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14609                                        DAG.getConstantFP(1.0, DL, VT));
14610           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
14611         }
14612 
14613         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
14614         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
14615             N1.getOperand(0) == N1.getOperand(1) &&
14616             N0.getOperand(0) == N1.getOperand(0)) {
14617           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
14618                                        DAG.getConstantFP(2.0, DL, VT));
14619           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
14620         }
14621       }
14622 
14623       if (N1.getOpcode() == ISD::FMUL) {
14624         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14625         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
14626 
14627         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
14628         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
14629           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14630                                        DAG.getConstantFP(1.0, DL, VT));
14631           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
14632         }
14633 
14634         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
14635         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
14636             N0.getOperand(0) == N0.getOperand(1) &&
14637             N1.getOperand(0) == N0.getOperand(0)) {
14638           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
14639                                        DAG.getConstantFP(2.0, DL, VT));
14640           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
14641         }
14642       }
14643 
14644       if (N0.getOpcode() == ISD::FADD) {
14645         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
14646         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
14647         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
14648             (N0.getOperand(0) == N1)) {
14649           return DAG.getNode(ISD::FMUL, DL, VT, N1,
14650                              DAG.getConstantFP(3.0, DL, VT));
14651         }
14652       }
14653 
14654       if (N1.getOpcode() == ISD::FADD) {
14655         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
14656         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
14657         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
14658             N1.getOperand(0) == N0) {
14659           return DAG.getNode(ISD::FMUL, DL, VT, N0,
14660                              DAG.getConstantFP(3.0, DL, VT));
14661         }
14662       }
14663 
14664       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
14665       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
14666           N0.getOperand(0) == N0.getOperand(1) &&
14667           N1.getOperand(0) == N1.getOperand(1) &&
14668           N0.getOperand(0) == N1.getOperand(0)) {
14669         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
14670                            DAG.getConstantFP(4.0, DL, VT));
14671       }
14672     }
14673   } // enable-unsafe-fp-math
14674 
14675   // FADD -> FMA combines:
14676   if (SDValue Fused = visitFADDForFMACombine(N)) {
14677     AddToWorklist(Fused.getNode());
14678     return Fused;
14679   }
14680   return SDValue();
14681 }
14682 
14683 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
14684   SDValue Chain = N->getOperand(0);
14685   SDValue N0 = N->getOperand(1);
14686   SDValue N1 = N->getOperand(2);
14687   EVT VT = N->getValueType(0);
14688   EVT ChainVT = N->getValueType(1);
14689   SDLoc DL(N);
14690   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14691 
14692   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
14693   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14694     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
14695             N1, DAG, LegalOperations, ForCodeSize)) {
14696       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14697                          {Chain, N0, NegN1});
14698     }
14699 
14700   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
14701   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
14702     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
14703             N0, DAG, LegalOperations, ForCodeSize)) {
14704       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
14705                          {Chain, N1, NegN0});
14706     }
14707   return SDValue();
14708 }
14709 
14710 SDValue DAGCombiner::visitFSUB(SDNode *N) {
14711   SDValue N0 = N->getOperand(0);
14712   SDValue N1 = N->getOperand(1);
14713   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
14714   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14715   EVT VT = N->getValueType(0);
14716   SDLoc DL(N);
14717   const TargetOptions &Options = DAG.getTarget().Options;
14718   const SDNodeFlags Flags = N->getFlags();
14719   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14720 
14721   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14722     return R;
14723 
14724   // fold (fsub c1, c2) -> c1-c2
14725   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
14726     return C;
14727 
14728   // fold vector ops
14729   if (VT.isVector())
14730     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14731       return FoldedVOp;
14732 
14733   if (SDValue NewSel = foldBinOpIntoSelect(N))
14734     return NewSel;
14735 
14736   // (fsub A, 0) -> A
14737   if (N1CFP && N1CFP->isZero()) {
14738     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
14739         Flags.hasNoSignedZeros()) {
14740       return N0;
14741     }
14742   }
14743 
14744   if (N0 == N1) {
14745     // (fsub x, x) -> 0.0
14746     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
14747       return DAG.getConstantFP(0.0f, DL, VT);
14748   }
14749 
14750   // (fsub -0.0, N1) -> -N1
14751   if (N0CFP && N0CFP->isZero()) {
14752     if (N0CFP->isNegative() ||
14753         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
14754       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
14755       // flushed to zero, unless all users treat denorms as zero (DAZ).
14756       // FIXME: This transform will change the sign of a NaN and the behavior
14757       // of a signaling NaN. It is only valid when a NoNaN flag is present.
14758       DenormalMode DenormMode = DAG.getDenormalMode(VT);
14759       if (DenormMode == DenormalMode::getIEEE()) {
14760         if (SDValue NegN1 =
14761                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14762           return NegN1;
14763         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14764           return DAG.getNode(ISD::FNEG, DL, VT, N1);
14765       }
14766     }
14767   }
14768 
14769   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
14770        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
14771       N1.getOpcode() == ISD::FADD) {
14772     // X - (X + Y) -> -Y
14773     if (N0 == N1->getOperand(0))
14774       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
14775     // X - (Y + X) -> -Y
14776     if (N0 == N1->getOperand(1))
14777       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
14778   }
14779 
14780   // fold (fsub A, (fneg B)) -> (fadd A, B)
14781   if (SDValue NegN1 =
14782           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
14783     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
14784 
14785   // FSUB -> FMA combines:
14786   if (SDValue Fused = visitFSUBForFMACombine(N)) {
14787     AddToWorklist(Fused.getNode());
14788     return Fused;
14789   }
14790 
14791   return SDValue();
14792 }
14793 
14794 SDValue DAGCombiner::visitFMUL(SDNode *N) {
14795   SDValue N0 = N->getOperand(0);
14796   SDValue N1 = N->getOperand(1);
14797   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
14798   EVT VT = N->getValueType(0);
14799   SDLoc DL(N);
14800   const TargetOptions &Options = DAG.getTarget().Options;
14801   const SDNodeFlags Flags = N->getFlags();
14802   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14803 
14804   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14805     return R;
14806 
14807   // fold (fmul c1, c2) -> c1*c2
14808   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
14809     return C;
14810 
14811   // canonicalize constant to RHS
14812   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14813      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14814     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
14815 
14816   // fold vector ops
14817   if (VT.isVector())
14818     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
14819       return FoldedVOp;
14820 
14821   if (SDValue NewSel = foldBinOpIntoSelect(N))
14822     return NewSel;
14823 
14824   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
14825     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
14826     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14827         N0.getOpcode() == ISD::FMUL) {
14828       SDValue N00 = N0.getOperand(0);
14829       SDValue N01 = N0.getOperand(1);
14830       // Avoid an infinite loop by making sure that N00 is not a constant
14831       // (the inner multiply has not been constant folded yet).
14832       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
14833           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
14834         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
14835         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
14836       }
14837     }
14838 
14839     // Match a special-case: we convert X * 2.0 into fadd.
14840     // fmul (fadd X, X), C -> fmul X, 2.0 * C
14841     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
14842         N0.getOperand(0) == N0.getOperand(1)) {
14843       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
14844       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
14845       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
14846     }
14847   }
14848 
14849   // fold (fmul X, 2.0) -> (fadd X, X)
14850   if (N1CFP && N1CFP->isExactlyValue(+2.0))
14851     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
14852 
14853   // fold (fmul X, -1.0) -> (fsub -0.0, X)
14854   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
14855     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
14856       return DAG.getNode(ISD::FSUB, DL, VT,
14857                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
14858     }
14859   }
14860 
14861   // -N0 * -N1 --> N0 * N1
14862   TargetLowering::NegatibleCost CostN0 =
14863       TargetLowering::NegatibleCost::Expensive;
14864   TargetLowering::NegatibleCost CostN1 =
14865       TargetLowering::NegatibleCost::Expensive;
14866   SDValue NegN0 =
14867       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14868   SDValue NegN1 =
14869       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14870   if (NegN0 && NegN1 &&
14871       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14872        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14873     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
14874 
14875   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
14876   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
14877   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
14878       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
14879       TLI.isOperationLegal(ISD::FABS, VT)) {
14880     SDValue Select = N0, X = N1;
14881     if (Select.getOpcode() != ISD::SELECT)
14882       std::swap(Select, X);
14883 
14884     SDValue Cond = Select.getOperand(0);
14885     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
14886     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
14887 
14888     if (TrueOpnd && FalseOpnd &&
14889         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
14890         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
14891         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
14892       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
14893       switch (CC) {
14894       default: break;
14895       case ISD::SETOLT:
14896       case ISD::SETULT:
14897       case ISD::SETOLE:
14898       case ISD::SETULE:
14899       case ISD::SETLT:
14900       case ISD::SETLE:
14901         std::swap(TrueOpnd, FalseOpnd);
14902         LLVM_FALLTHROUGH;
14903       case ISD::SETOGT:
14904       case ISD::SETUGT:
14905       case ISD::SETOGE:
14906       case ISD::SETUGE:
14907       case ISD::SETGT:
14908       case ISD::SETGE:
14909         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
14910             TLI.isOperationLegal(ISD::FNEG, VT))
14911           return DAG.getNode(ISD::FNEG, DL, VT,
14912                    DAG.getNode(ISD::FABS, DL, VT, X));
14913         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
14914           return DAG.getNode(ISD::FABS, DL, VT, X);
14915 
14916         break;
14917       }
14918     }
14919   }
14920 
14921   // FMUL -> FMA combines:
14922   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
14923     AddToWorklist(Fused.getNode());
14924     return Fused;
14925   }
14926 
14927   return SDValue();
14928 }
14929 
14930 SDValue DAGCombiner::visitFMA(SDNode *N) {
14931   SDValue N0 = N->getOperand(0);
14932   SDValue N1 = N->getOperand(1);
14933   SDValue N2 = N->getOperand(2);
14934   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14935   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14936   EVT VT = N->getValueType(0);
14937   SDLoc DL(N);
14938   const TargetOptions &Options = DAG.getTarget().Options;
14939   // FMA nodes have flags that propagate to the created nodes.
14940   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14941 
14942   bool UnsafeFPMath =
14943       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
14944 
14945   // Constant fold FMA.
14946   if (isa<ConstantFPSDNode>(N0) &&
14947       isa<ConstantFPSDNode>(N1) &&
14948       isa<ConstantFPSDNode>(N2)) {
14949     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
14950   }
14951 
14952   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
14953   TargetLowering::NegatibleCost CostN0 =
14954       TargetLowering::NegatibleCost::Expensive;
14955   TargetLowering::NegatibleCost CostN1 =
14956       TargetLowering::NegatibleCost::Expensive;
14957   SDValue NegN0 =
14958       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14959   SDValue NegN1 =
14960       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14961   if (NegN0 && NegN1 &&
14962       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14963        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14964     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
14965 
14966   if (UnsafeFPMath) {
14967     if (N0CFP && N0CFP->isZero())
14968       return N2;
14969     if (N1CFP && N1CFP->isZero())
14970       return N2;
14971   }
14972 
14973   if (N0CFP && N0CFP->isExactlyValue(1.0))
14974     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
14975   if (N1CFP && N1CFP->isExactlyValue(1.0))
14976     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
14977 
14978   // Canonicalize (fma c, x, y) -> (fma x, c, y)
14979   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14980      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14981     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
14982 
14983   if (UnsafeFPMath) {
14984     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
14985     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
14986         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14987         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
14988       return DAG.getNode(ISD::FMUL, DL, VT, N0,
14989                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
14990     }
14991 
14992     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
14993     if (N0.getOpcode() == ISD::FMUL &&
14994         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
14995         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
14996       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
14997                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
14998                          N2);
14999     }
15000   }
15001 
15002   // (fma x, -1, y) -> (fadd (fneg x), y)
15003   if (N1CFP) {
15004     if (N1CFP->isExactlyValue(1.0))
15005       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
15006 
15007     if (N1CFP->isExactlyValue(-1.0) &&
15008         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
15009       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
15010       AddToWorklist(RHSNeg.getNode());
15011       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
15012     }
15013 
15014     // fma (fneg x), K, y -> fma x -K, y
15015     if (N0.getOpcode() == ISD::FNEG &&
15016         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15017          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
15018                                               ForCodeSize)))) {
15019       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
15020                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
15021     }
15022   }
15023 
15024   if (UnsafeFPMath) {
15025     // (fma x, c, x) -> (fmul x, (c+1))
15026     if (N1CFP && N0 == N2) {
15027       return DAG.getNode(
15028           ISD::FMUL, DL, VT, N0,
15029           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
15030     }
15031 
15032     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
15033     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
15034       return DAG.getNode(
15035           ISD::FMUL, DL, VT, N0,
15036           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
15037     }
15038   }
15039 
15040   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
15041   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
15042   if (!TLI.isFNegFree(VT))
15043     if (SDValue Neg = TLI.getCheaperNegatedExpression(
15044             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
15045       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
15046   return SDValue();
15047 }
15048 
15049 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
15050 // reciprocal.
15051 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
15052 // Notice that this is not always beneficial. One reason is different targets
15053 // may have different costs for FDIV and FMUL, so sometimes the cost of two
15054 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
15055 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
15056 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
15057   // TODO: Limit this transform based on optsize/minsize - it always creates at
15058   //       least 1 extra instruction. But the perf win may be substantial enough
15059   //       that only minsize should restrict this.
15060   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
15061   const SDNodeFlags Flags = N->getFlags();
15062   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
15063     return SDValue();
15064 
15065   // Skip if current node is a reciprocal/fneg-reciprocal.
15066   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
15067   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
15068   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
15069     return SDValue();
15070 
15071   // Exit early if the target does not want this transform or if there can't
15072   // possibly be enough uses of the divisor to make the transform worthwhile.
15073   unsigned MinUses = TLI.combineRepeatedFPDivisors();
15074 
15075   // For splat vectors, scale the number of uses by the splat factor. If we can
15076   // convert the division into a scalar op, that will likely be much faster.
15077   unsigned NumElts = 1;
15078   EVT VT = N->getValueType(0);
15079   if (VT.isVector() && DAG.isSplatValue(N1))
15080     NumElts = VT.getVectorMinNumElements();
15081 
15082   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
15083     return SDValue();
15084 
15085   // Find all FDIV users of the same divisor.
15086   // Use a set because duplicates may be present in the user list.
15087   SetVector<SDNode *> Users;
15088   for (auto *U : N1->uses()) {
15089     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
15090       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
15091       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
15092           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
15093           U->getFlags().hasAllowReassociation() &&
15094           U->getFlags().hasNoSignedZeros())
15095         continue;
15096 
15097       // This division is eligible for optimization only if global unsafe math
15098       // is enabled or if this division allows reciprocal formation.
15099       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
15100         Users.insert(U);
15101     }
15102   }
15103 
15104   // Now that we have the actual number of divisor uses, make sure it meets
15105   // the minimum threshold specified by the target.
15106   if ((Users.size() * NumElts) < MinUses)
15107     return SDValue();
15108 
15109   SDLoc DL(N);
15110   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15111   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
15112 
15113   // Dividend / Divisor -> Dividend * Reciprocal
15114   for (auto *U : Users) {
15115     SDValue Dividend = U->getOperand(0);
15116     if (Dividend != FPOne) {
15117       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
15118                                     Reciprocal, Flags);
15119       CombineTo(U, NewNode);
15120     } else if (U != Reciprocal.getNode()) {
15121       // In the absence of fast-math-flags, this user node is always the
15122       // same node as Reciprocal, but with FMF they may be different nodes.
15123       CombineTo(U, Reciprocal);
15124     }
15125   }
15126   return SDValue(N, 0);  // N was replaced.
15127 }
15128 
15129 SDValue DAGCombiner::visitFDIV(SDNode *N) {
15130   SDValue N0 = N->getOperand(0);
15131   SDValue N1 = N->getOperand(1);
15132   EVT VT = N->getValueType(0);
15133   SDLoc DL(N);
15134   const TargetOptions &Options = DAG.getTarget().Options;
15135   SDNodeFlags Flags = N->getFlags();
15136   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15137 
15138   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15139     return R;
15140 
15141   // fold (fdiv c1, c2) -> c1/c2
15142   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
15143     return C;
15144 
15145   // fold vector ops
15146   if (VT.isVector())
15147     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
15148       return FoldedVOp;
15149 
15150   if (SDValue NewSel = foldBinOpIntoSelect(N))
15151     return NewSel;
15152 
15153   if (SDValue V = combineRepeatedFPDivisors(N))
15154     return V;
15155 
15156   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
15157     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
15158     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
15159       // Compute the reciprocal 1.0 / c2.
15160       const APFloat &N1APF = N1CFP->getValueAPF();
15161       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
15162       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
15163       // Only do the transform if the reciprocal is a legal fp immediate that
15164       // isn't too nasty (eg NaN, denormal, ...).
15165       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
15166           (!LegalOperations ||
15167            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
15168            // backend)... we should handle this gracefully after Legalize.
15169            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
15170            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
15171            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
15172         return DAG.getNode(ISD::FMUL, DL, VT, N0,
15173                            DAG.getConstantFP(Recip, DL, VT));
15174     }
15175 
15176     // If this FDIV is part of a reciprocal square root, it may be folded
15177     // into a target-specific square root estimate instruction.
15178     if (N1.getOpcode() == ISD::FSQRT) {
15179       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
15180         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15181     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
15182                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15183       if (SDValue RV =
15184               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15185         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
15186         AddToWorklist(RV.getNode());
15187         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15188       }
15189     } else if (N1.getOpcode() == ISD::FP_ROUND &&
15190                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15191       if (SDValue RV =
15192               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
15193         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
15194         AddToWorklist(RV.getNode());
15195         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
15196       }
15197     } else if (N1.getOpcode() == ISD::FMUL) {
15198       // Look through an FMUL. Even though this won't remove the FDIV directly,
15199       // it's still worthwhile to get rid of the FSQRT if possible.
15200       SDValue Sqrt, Y;
15201       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
15202         Sqrt = N1.getOperand(0);
15203         Y = N1.getOperand(1);
15204       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
15205         Sqrt = N1.getOperand(1);
15206         Y = N1.getOperand(0);
15207       }
15208       if (Sqrt.getNode()) {
15209         // If the other multiply operand is known positive, pull it into the
15210         // sqrt. That will eliminate the division if we convert to an estimate.
15211         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
15212             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
15213           SDValue A;
15214           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
15215             A = Y.getOperand(0);
15216           else if (Y == Sqrt.getOperand(0))
15217             A = Y;
15218           if (A) {
15219             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
15220             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
15221             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
15222             SDValue AAZ =
15223                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
15224             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
15225               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
15226 
15227             // Estimate creation failed. Clean up speculatively created nodes.
15228             recursivelyDeleteUnusedNodes(AAZ.getNode());
15229           }
15230         }
15231 
15232         // We found a FSQRT, so try to make this fold:
15233         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
15234         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
15235           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
15236           AddToWorklist(Div.getNode());
15237           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
15238         }
15239       }
15240     }
15241 
15242     // Fold into a reciprocal estimate and multiply instead of a real divide.
15243     if (Options.NoInfsFPMath || Flags.hasNoInfs())
15244       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
15245         return RV;
15246   }
15247 
15248   // Fold X/Sqrt(X) -> Sqrt(X)
15249   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
15250       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
15251     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
15252       return N1;
15253 
15254   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
15255   TargetLowering::NegatibleCost CostN0 =
15256       TargetLowering::NegatibleCost::Expensive;
15257   TargetLowering::NegatibleCost CostN1 =
15258       TargetLowering::NegatibleCost::Expensive;
15259   SDValue NegN0 =
15260       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
15261   SDValue NegN1 =
15262       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
15263   if (NegN0 && NegN1 &&
15264       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
15265        CostN1 == TargetLowering::NegatibleCost::Cheaper))
15266     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
15267 
15268   return SDValue();
15269 }
15270 
15271 SDValue DAGCombiner::visitFREM(SDNode *N) {
15272   SDValue N0 = N->getOperand(0);
15273   SDValue N1 = N->getOperand(1);
15274   EVT VT = N->getValueType(0);
15275   SDNodeFlags Flags = N->getFlags();
15276   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15277 
15278   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
15279     return R;
15280 
15281   // fold (frem c1, c2) -> fmod(c1,c2)
15282   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
15283     return C;
15284 
15285   if (SDValue NewSel = foldBinOpIntoSelect(N))
15286     return NewSel;
15287 
15288   return SDValue();
15289 }
15290 
15291 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
15292   SDNodeFlags Flags = N->getFlags();
15293   const TargetOptions &Options = DAG.getTarget().Options;
15294 
15295   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
15296   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
15297   if (!Flags.hasApproximateFuncs() ||
15298       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
15299     return SDValue();
15300 
15301   SDValue N0 = N->getOperand(0);
15302   if (TLI.isFsqrtCheap(N0, DAG))
15303     return SDValue();
15304 
15305   // FSQRT nodes have flags that propagate to the created nodes.
15306   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
15307   //       transform the fdiv, we may produce a sub-optimal estimate sequence
15308   //       because the reciprocal calculation may not have to filter out a
15309   //       0.0 input.
15310   return buildSqrtEstimate(N0, Flags);
15311 }
15312 
15313 /// copysign(x, fp_extend(y)) -> copysign(x, y)
15314 /// copysign(x, fp_round(y)) -> copysign(x, y)
15315 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
15316   SDValue N1 = N->getOperand(1);
15317   if ((N1.getOpcode() == ISD::FP_EXTEND ||
15318        N1.getOpcode() == ISD::FP_ROUND)) {
15319     EVT N1VT = N1->getValueType(0);
15320     EVT N1Op0VT = N1->getOperand(0).getValueType();
15321 
15322     // Always fold no-op FP casts.
15323     if (N1VT == N1Op0VT)
15324       return true;
15325 
15326     // Do not optimize out type conversion of f128 type yet.
15327     // For some targets like x86_64, configuration is changed to keep one f128
15328     // value in one SSE register, but instruction selection cannot handle
15329     // FCOPYSIGN on SSE registers yet.
15330     if (N1Op0VT == MVT::f128)
15331       return false;
15332 
15333     // Avoid mismatched vector operand types, for better instruction selection.
15334     if (N1Op0VT.isVector())
15335       return false;
15336 
15337     return true;
15338   }
15339   return false;
15340 }
15341 
15342 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
15343   SDValue N0 = N->getOperand(0);
15344   SDValue N1 = N->getOperand(1);
15345   EVT VT = N->getValueType(0);
15346 
15347   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
15348   if (SDValue C =
15349           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
15350     return C;
15351 
15352   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
15353     const APFloat &V = N1C->getValueAPF();
15354     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
15355     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
15356     if (!V.isNegative()) {
15357       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
15358         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15359     } else {
15360       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
15361         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
15362                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
15363     }
15364   }
15365 
15366   // copysign(fabs(x), y) -> copysign(x, y)
15367   // copysign(fneg(x), y) -> copysign(x, y)
15368   // copysign(copysign(x,z), y) -> copysign(x, y)
15369   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
15370       N0.getOpcode() == ISD::FCOPYSIGN)
15371     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
15372 
15373   // copysign(x, abs(y)) -> abs(x)
15374   if (N1.getOpcode() == ISD::FABS)
15375     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15376 
15377   // copysign(x, copysign(y,z)) -> copysign(x, z)
15378   if (N1.getOpcode() == ISD::FCOPYSIGN)
15379     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
15380 
15381   // copysign(x, fp_extend(y)) -> copysign(x, y)
15382   // copysign(x, fp_round(y)) -> copysign(x, y)
15383   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
15384     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
15385 
15386   return SDValue();
15387 }
15388 
15389 SDValue DAGCombiner::visitFPOW(SDNode *N) {
15390   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
15391   if (!ExponentC)
15392     return SDValue();
15393   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15394 
15395   // Try to convert x ** (1/3) into cube root.
15396   // TODO: Handle the various flavors of long double.
15397   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
15398   //       Some range near 1/3 should be fine.
15399   EVT VT = N->getValueType(0);
15400   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
15401       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
15402     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
15403     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
15404     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
15405     // For regular numbers, rounding may cause the results to differ.
15406     // Therefore, we require { nsz ninf nnan afn } for this transform.
15407     // TODO: We could select out the special cases if we don't have nsz/ninf.
15408     SDNodeFlags Flags = N->getFlags();
15409     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
15410         !Flags.hasApproximateFuncs())
15411       return SDValue();
15412 
15413     // Do not create a cbrt() libcall if the target does not have it, and do not
15414     // turn a pow that has lowering support into a cbrt() libcall.
15415     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
15416         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
15417          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
15418       return SDValue();
15419 
15420     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
15421   }
15422 
15423   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
15424   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
15425   // TODO: This could be extended (using a target hook) to handle smaller
15426   // power-of-2 fractional exponents.
15427   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
15428   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
15429   if (ExponentIs025 || ExponentIs075) {
15430     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
15431     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
15432     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
15433     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
15434     // For regular numbers, rounding may cause the results to differ.
15435     // Therefore, we require { nsz ninf afn } for this transform.
15436     // TODO: We could select out the special cases if we don't have nsz/ninf.
15437     SDNodeFlags Flags = N->getFlags();
15438 
15439     // We only need no signed zeros for the 0.25 case.
15440     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
15441         !Flags.hasApproximateFuncs())
15442       return SDValue();
15443 
15444     // Don't double the number of libcalls. We are trying to inline fast code.
15445     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
15446       return SDValue();
15447 
15448     // Assume that libcalls are the smallest code.
15449     // TODO: This restriction should probably be lifted for vectors.
15450     if (ForCodeSize)
15451       return SDValue();
15452 
15453     // pow(X, 0.25) --> sqrt(sqrt(X))
15454     SDLoc DL(N);
15455     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
15456     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
15457     if (ExponentIs025)
15458       return SqrtSqrt;
15459     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
15460     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
15461   }
15462 
15463   return SDValue();
15464 }
15465 
15466 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
15467                                const TargetLowering &TLI) {
15468   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
15469   // replacing casts with a libcall. We also must be allowed to ignore -0.0
15470   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
15471   // conversions would return +0.0.
15472   // FIXME: We should be able to use node-level FMF here.
15473   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
15474   EVT VT = N->getValueType(0);
15475   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
15476       !DAG.getTarget().Options.NoSignedZerosFPMath)
15477     return SDValue();
15478 
15479   // fptosi/fptoui round towards zero, so converting from FP to integer and
15480   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
15481   SDValue N0 = N->getOperand(0);
15482   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
15483       N0.getOperand(0).getValueType() == VT)
15484     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15485 
15486   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
15487       N0.getOperand(0).getValueType() == VT)
15488     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
15489 
15490   return SDValue();
15491 }
15492 
15493 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
15494   SDValue N0 = N->getOperand(0);
15495   EVT VT = N->getValueType(0);
15496   EVT OpVT = N0.getValueType();
15497 
15498   // [us]itofp(undef) = 0, because the result value is bounded.
15499   if (N0.isUndef())
15500     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15501 
15502   // fold (sint_to_fp c1) -> c1fp
15503   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15504       // ...but only if the target supports immediate floating-point values
15505       (!LegalOperations ||
15506        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15507     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15508 
15509   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
15510   // but UINT_TO_FP is legal on this target, try to convert.
15511   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
15512       hasOperation(ISD::UINT_TO_FP, OpVT)) {
15513     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
15514     if (DAG.SignBitIsZero(N0))
15515       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15516   }
15517 
15518   // The next optimizations are desirable only if SELECT_CC can be lowered.
15519   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
15520   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
15521       !VT.isVector() &&
15522       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15523     SDLoc DL(N);
15524     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
15525                          DAG.getConstantFP(0.0, DL, VT));
15526   }
15527 
15528   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
15529   //      (select (setcc x, y, cc), 1.0, 0.0)
15530   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
15531       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
15532       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15533     SDLoc DL(N);
15534     return DAG.getSelect(DL, VT, N0.getOperand(0),
15535                          DAG.getConstantFP(1.0, DL, VT),
15536                          DAG.getConstantFP(0.0, DL, VT));
15537   }
15538 
15539   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15540     return FTrunc;
15541 
15542   return SDValue();
15543 }
15544 
15545 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
15546   SDValue N0 = N->getOperand(0);
15547   EVT VT = N->getValueType(0);
15548   EVT OpVT = N0.getValueType();
15549 
15550   // [us]itofp(undef) = 0, because the result value is bounded.
15551   if (N0.isUndef())
15552     return DAG.getConstantFP(0.0, SDLoc(N), VT);
15553 
15554   // fold (uint_to_fp c1) -> c1fp
15555   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
15556       // ...but only if the target supports immediate floating-point values
15557       (!LegalOperations ||
15558        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
15559     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
15560 
15561   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
15562   // but SINT_TO_FP is legal on this target, try to convert.
15563   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
15564       hasOperation(ISD::SINT_TO_FP, OpVT)) {
15565     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
15566     if (DAG.SignBitIsZero(N0))
15567       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
15568   }
15569 
15570   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
15571   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
15572       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
15573     SDLoc DL(N);
15574     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
15575                          DAG.getConstantFP(0.0, DL, VT));
15576   }
15577 
15578   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
15579     return FTrunc;
15580 
15581   return SDValue();
15582 }
15583 
15584 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
15585 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
15586   SDValue N0 = N->getOperand(0);
15587   EVT VT = N->getValueType(0);
15588 
15589   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
15590     return SDValue();
15591 
15592   SDValue Src = N0.getOperand(0);
15593   EVT SrcVT = Src.getValueType();
15594   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
15595   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
15596 
15597   // We can safely assume the conversion won't overflow the output range,
15598   // because (for example) (uint8_t)18293.f is undefined behavior.
15599 
15600   // Since we can assume the conversion won't overflow, our decision as to
15601   // whether the input will fit in the float should depend on the minimum
15602   // of the input range and output range.
15603 
15604   // This means this is also safe for a signed input and unsigned output, since
15605   // a negative input would lead to undefined behavior.
15606   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
15607   unsigned OutputSize = (int)VT.getScalarSizeInBits();
15608   unsigned ActualSize = std::min(InputSize, OutputSize);
15609   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
15610 
15611   // We can only fold away the float conversion if the input range can be
15612   // represented exactly in the float range.
15613   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
15614     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
15615       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
15616                                                        : ISD::ZERO_EXTEND;
15617       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
15618     }
15619     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
15620       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
15621     return DAG.getBitcast(VT, Src);
15622   }
15623   return SDValue();
15624 }
15625 
15626 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
15627   SDValue N0 = N->getOperand(0);
15628   EVT VT = N->getValueType(0);
15629 
15630   // fold (fp_to_sint undef) -> undef
15631   if (N0.isUndef())
15632     return DAG.getUNDEF(VT);
15633 
15634   // fold (fp_to_sint c1fp) -> c1
15635   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15636     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
15637 
15638   return FoldIntToFPToInt(N, DAG);
15639 }
15640 
15641 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
15642   SDValue N0 = N->getOperand(0);
15643   EVT VT = N->getValueType(0);
15644 
15645   // fold (fp_to_uint undef) -> undef
15646   if (N0.isUndef())
15647     return DAG.getUNDEF(VT);
15648 
15649   // fold (fp_to_uint c1fp) -> c1
15650   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15651     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
15652 
15653   return FoldIntToFPToInt(N, DAG);
15654 }
15655 
15656 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
15657   SDValue N0 = N->getOperand(0);
15658   SDValue N1 = N->getOperand(1);
15659   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
15660   EVT VT = N->getValueType(0);
15661 
15662   // fold (fp_round c1fp) -> c1fp
15663   if (N0CFP)
15664     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
15665 
15666   // fold (fp_round (fp_extend x)) -> x
15667   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
15668     return N0.getOperand(0);
15669 
15670   // fold (fp_round (fp_round x)) -> (fp_round x)
15671   if (N0.getOpcode() == ISD::FP_ROUND) {
15672     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
15673     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
15674 
15675     // Skip this folding if it results in an fp_round from f80 to f16.
15676     //
15677     // f80 to f16 always generates an expensive (and as yet, unimplemented)
15678     // libcall to __truncxfhf2 instead of selecting native f16 conversion
15679     // instructions from f32 or f64.  Moreover, the first (value-preserving)
15680     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
15681     // x86.
15682     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
15683       return SDValue();
15684 
15685     // If the first fp_round isn't a value preserving truncation, it might
15686     // introduce a tie in the second fp_round, that wouldn't occur in the
15687     // single-step fp_round we want to fold to.
15688     // In other words, double rounding isn't the same as rounding.
15689     // Also, this is a value preserving truncation iff both fp_round's are.
15690     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
15691       SDLoc DL(N);
15692       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
15693                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
15694     }
15695   }
15696 
15697   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
15698   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
15699     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
15700                               N0.getOperand(0), N1);
15701     AddToWorklist(Tmp.getNode());
15702     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
15703                        Tmp, N0.getOperand(1));
15704   }
15705 
15706   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15707     return NewVSel;
15708 
15709   return SDValue();
15710 }
15711 
15712 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
15713   SDValue N0 = N->getOperand(0);
15714   EVT VT = N->getValueType(0);
15715 
15716   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
15717   if (N->hasOneUse() &&
15718       N->use_begin()->getOpcode() == ISD::FP_ROUND)
15719     return SDValue();
15720 
15721   // fold (fp_extend c1fp) -> c1fp
15722   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15723     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
15724 
15725   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
15726   if (N0.getOpcode() == ISD::FP16_TO_FP &&
15727       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
15728     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
15729 
15730   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
15731   // value of X.
15732   if (N0.getOpcode() == ISD::FP_ROUND
15733       && N0.getConstantOperandVal(1) == 1) {
15734     SDValue In = N0.getOperand(0);
15735     if (In.getValueType() == VT) return In;
15736     if (VT.bitsLT(In.getValueType()))
15737       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
15738                          In, N0.getOperand(1));
15739     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
15740   }
15741 
15742   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
15743   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15744       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
15745     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15746     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
15747                                      LN0->getChain(),
15748                                      LN0->getBasePtr(), N0.getValueType(),
15749                                      LN0->getMemOperand());
15750     CombineTo(N, ExtLoad);
15751     CombineTo(N0.getNode(),
15752               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
15753                           N0.getValueType(), ExtLoad,
15754                           DAG.getIntPtrConstant(1, SDLoc(N0))),
15755               ExtLoad.getValue(1));
15756     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15757   }
15758 
15759   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
15760     return NewVSel;
15761 
15762   return SDValue();
15763 }
15764 
15765 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
15766   SDValue N0 = N->getOperand(0);
15767   EVT VT = N->getValueType(0);
15768 
15769   // fold (fceil c1) -> fceil(c1)
15770   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15771     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
15772 
15773   return SDValue();
15774 }
15775 
15776 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
15777   SDValue N0 = N->getOperand(0);
15778   EVT VT = N->getValueType(0);
15779 
15780   // fold (ftrunc c1) -> ftrunc(c1)
15781   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15782     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
15783 
15784   // fold ftrunc (known rounded int x) -> x
15785   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
15786   // likely to be generated to extract integer from a rounded floating value.
15787   switch (N0.getOpcode()) {
15788   default: break;
15789   case ISD::FRINT:
15790   case ISD::FTRUNC:
15791   case ISD::FNEARBYINT:
15792   case ISD::FFLOOR:
15793   case ISD::FCEIL:
15794     return N0;
15795   }
15796 
15797   return SDValue();
15798 }
15799 
15800 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
15801   SDValue N0 = N->getOperand(0);
15802   EVT VT = N->getValueType(0);
15803 
15804   // fold (ffloor c1) -> ffloor(c1)
15805   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15806     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
15807 
15808   return SDValue();
15809 }
15810 
15811 SDValue DAGCombiner::visitFNEG(SDNode *N) {
15812   SDValue N0 = N->getOperand(0);
15813   EVT VT = N->getValueType(0);
15814   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15815 
15816   // Constant fold FNEG.
15817   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15818     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
15819 
15820   if (SDValue NegN0 =
15821           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
15822     return NegN0;
15823 
15824   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
15825   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
15826   // know it was called from a context with a nsz flag if the input fsub does
15827   // not.
15828   if (N0.getOpcode() == ISD::FSUB &&
15829       (DAG.getTarget().Options.NoSignedZerosFPMath ||
15830        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
15831     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
15832                        N0.getOperand(0));
15833   }
15834 
15835   if (SDValue Cast = foldSignChangeInBitcast(N))
15836     return Cast;
15837 
15838   return SDValue();
15839 }
15840 
15841 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
15842   SDValue N0 = N->getOperand(0);
15843   SDValue N1 = N->getOperand(1);
15844   EVT VT = N->getValueType(0);
15845   const SDNodeFlags Flags = N->getFlags();
15846   unsigned Opc = N->getOpcode();
15847   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
15848   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
15849   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
15850 
15851   // Constant fold.
15852   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
15853     return C;
15854 
15855   // Canonicalize to constant on RHS.
15856   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
15857       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
15858     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
15859 
15860   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
15861     const APFloat &AF = N1CFP->getValueAPF();
15862 
15863     // minnum(X, nan) -> X
15864     // maxnum(X, nan) -> X
15865     // minimum(X, nan) -> nan
15866     // maximum(X, nan) -> nan
15867     if (AF.isNaN())
15868       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
15869 
15870     // In the following folds, inf can be replaced with the largest finite
15871     // float, if the ninf flag is set.
15872     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
15873       // minnum(X, -inf) -> -inf
15874       // maxnum(X, +inf) -> +inf
15875       // minimum(X, -inf) -> -inf if nnan
15876       // maximum(X, +inf) -> +inf if nnan
15877       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
15878         return N->getOperand(1);
15879 
15880       // minnum(X, +inf) -> X if nnan
15881       // maxnum(X, -inf) -> X if nnan
15882       // minimum(X, +inf) -> X
15883       // maximum(X, -inf) -> X
15884       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
15885         return N->getOperand(0);
15886     }
15887   }
15888 
15889   return SDValue();
15890 }
15891 
15892 SDValue DAGCombiner::visitFABS(SDNode *N) {
15893   SDValue N0 = N->getOperand(0);
15894   EVT VT = N->getValueType(0);
15895 
15896   // fold (fabs c1) -> fabs(c1)
15897   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
15898     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
15899 
15900   // fold (fabs (fabs x)) -> (fabs x)
15901   if (N0.getOpcode() == ISD::FABS)
15902     return N->getOperand(0);
15903 
15904   // fold (fabs (fneg x)) -> (fabs x)
15905   // fold (fabs (fcopysign x, y)) -> (fabs x)
15906   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
15907     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
15908 
15909   if (SDValue Cast = foldSignChangeInBitcast(N))
15910     return Cast;
15911 
15912   return SDValue();
15913 }
15914 
15915 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
15916   SDValue Chain = N->getOperand(0);
15917   SDValue N1 = N->getOperand(1);
15918   SDValue N2 = N->getOperand(2);
15919 
15920   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
15921   // nondeterministic jumps).
15922   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
15923     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
15924                        N1->getOperand(0), N2);
15925   }
15926 
15927   // If N is a constant we could fold this into a fallthrough or unconditional
15928   // branch. However that doesn't happen very often in normal code, because
15929   // Instcombine/SimplifyCFG should have handled the available opportunities.
15930   // If we did this folding here, it would be necessary to update the
15931   // MachineBasicBlock CFG, which is awkward.
15932 
15933   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
15934   // on the target.
15935   if (N1.getOpcode() == ISD::SETCC &&
15936       TLI.isOperationLegalOrCustom(ISD::BR_CC,
15937                                    N1.getOperand(0).getValueType())) {
15938     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15939                        Chain, N1.getOperand(2),
15940                        N1.getOperand(0), N1.getOperand(1), N2);
15941   }
15942 
15943   if (N1.hasOneUse()) {
15944     // rebuildSetCC calls visitXor which may change the Chain when there is a
15945     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
15946     HandleSDNode ChainHandle(Chain);
15947     if (SDValue NewN1 = rebuildSetCC(N1))
15948       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
15949                          ChainHandle.getValue(), NewN1, N2);
15950   }
15951 
15952   return SDValue();
15953 }
15954 
15955 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
15956   if (N.getOpcode() == ISD::SRL ||
15957       (N.getOpcode() == ISD::TRUNCATE &&
15958        (N.getOperand(0).hasOneUse() &&
15959         N.getOperand(0).getOpcode() == ISD::SRL))) {
15960     // Look pass the truncate.
15961     if (N.getOpcode() == ISD::TRUNCATE)
15962       N = N.getOperand(0);
15963 
15964     // Match this pattern so that we can generate simpler code:
15965     //
15966     //   %a = ...
15967     //   %b = and i32 %a, 2
15968     //   %c = srl i32 %b, 1
15969     //   brcond i32 %c ...
15970     //
15971     // into
15972     //
15973     //   %a = ...
15974     //   %b = and i32 %a, 2
15975     //   %c = setcc eq %b, 0
15976     //   brcond %c ...
15977     //
15978     // This applies only when the AND constant value has one bit set and the
15979     // SRL constant is equal to the log2 of the AND constant. The back-end is
15980     // smart enough to convert the result into a TEST/JMP sequence.
15981     SDValue Op0 = N.getOperand(0);
15982     SDValue Op1 = N.getOperand(1);
15983 
15984     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
15985       SDValue AndOp1 = Op0.getOperand(1);
15986 
15987       if (AndOp1.getOpcode() == ISD::Constant) {
15988         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
15989 
15990         if (AndConst.isPowerOf2() &&
15991             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
15992           SDLoc DL(N);
15993           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
15994                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
15995                               ISD::SETNE);
15996         }
15997       }
15998     }
15999   }
16000 
16001   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
16002   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
16003   if (N.getOpcode() == ISD::XOR) {
16004     // Because we may call this on a speculatively constructed
16005     // SimplifiedSetCC Node, we need to simplify this node first.
16006     // Ideally this should be folded into SimplifySetCC and not
16007     // here. For now, grab a handle to N so we don't lose it from
16008     // replacements interal to the visit.
16009     HandleSDNode XORHandle(N);
16010     while (N.getOpcode() == ISD::XOR) {
16011       SDValue Tmp = visitXOR(N.getNode());
16012       // No simplification done.
16013       if (!Tmp.getNode())
16014         break;
16015       // Returning N is form in-visit replacement that may invalidated
16016       // N. Grab value from Handle.
16017       if (Tmp.getNode() == N.getNode())
16018         N = XORHandle.getValue();
16019       else // Node simplified. Try simplifying again.
16020         N = Tmp;
16021     }
16022 
16023     if (N.getOpcode() != ISD::XOR)
16024       return N;
16025 
16026     SDValue Op0 = N->getOperand(0);
16027     SDValue Op1 = N->getOperand(1);
16028 
16029     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
16030       bool Equal = false;
16031       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
16032       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
16033           Op0.getValueType() == MVT::i1) {
16034         N = Op0;
16035         Op0 = N->getOperand(0);
16036         Op1 = N->getOperand(1);
16037         Equal = true;
16038       }
16039 
16040       EVT SetCCVT = N.getValueType();
16041       if (LegalTypes)
16042         SetCCVT = getSetCCResultType(SetCCVT);
16043       // Replace the uses of XOR with SETCC
16044       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
16045                           Equal ? ISD::SETEQ : ISD::SETNE);
16046     }
16047   }
16048 
16049   return SDValue();
16050 }
16051 
16052 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
16053 //
16054 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
16055   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
16056   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
16057 
16058   // If N is a constant we could fold this into a fallthrough or unconditional
16059   // branch. However that doesn't happen very often in normal code, because
16060   // Instcombine/SimplifyCFG should have handled the available opportunities.
16061   // If we did this folding here, it would be necessary to update the
16062   // MachineBasicBlock CFG, which is awkward.
16063 
16064   // Use SimplifySetCC to simplify SETCC's.
16065   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
16066                                CondLHS, CondRHS, CC->get(), SDLoc(N),
16067                                false);
16068   if (Simp.getNode()) AddToWorklist(Simp.getNode());
16069 
16070   // fold to a simpler setcc
16071   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
16072     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
16073                        N->getOperand(0), Simp.getOperand(2),
16074                        Simp.getOperand(0), Simp.getOperand(1),
16075                        N->getOperand(4));
16076 
16077   return SDValue();
16078 }
16079 
16080 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
16081                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
16082                                      const TargetLowering &TLI) {
16083   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
16084     if (LD->isIndexed())
16085       return false;
16086     EVT VT = LD->getMemoryVT();
16087     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
16088       return false;
16089     Ptr = LD->getBasePtr();
16090   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
16091     if (ST->isIndexed())
16092       return false;
16093     EVT VT = ST->getMemoryVT();
16094     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
16095       return false;
16096     Ptr = ST->getBasePtr();
16097     IsLoad = false;
16098   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
16099     if (LD->isIndexed())
16100       return false;
16101     EVT VT = LD->getMemoryVT();
16102     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
16103         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
16104       return false;
16105     Ptr = LD->getBasePtr();
16106     IsMasked = true;
16107   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
16108     if (ST->isIndexed())
16109       return false;
16110     EVT VT = ST->getMemoryVT();
16111     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
16112         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
16113       return false;
16114     Ptr = ST->getBasePtr();
16115     IsLoad = false;
16116     IsMasked = true;
16117   } else {
16118     return false;
16119   }
16120   return true;
16121 }
16122 
16123 /// Try turning a load/store into a pre-indexed load/store when the base
16124 /// pointer is an add or subtract and it has other uses besides the load/store.
16125 /// After the transformation, the new indexed load/store has effectively folded
16126 /// the add/subtract in and all of its other uses are redirected to the
16127 /// new load/store.
16128 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
16129   if (Level < AfterLegalizeDAG)
16130     return false;
16131 
16132   bool IsLoad = true;
16133   bool IsMasked = false;
16134   SDValue Ptr;
16135   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
16136                                 Ptr, TLI))
16137     return false;
16138 
16139   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
16140   // out.  There is no reason to make this a preinc/predec.
16141   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
16142       Ptr->hasOneUse())
16143     return false;
16144 
16145   // Ask the target to do addressing mode selection.
16146   SDValue BasePtr;
16147   SDValue Offset;
16148   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16149   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
16150     return false;
16151 
16152   // Backends without true r+i pre-indexed forms may need to pass a
16153   // constant base with a variable offset so that constant coercion
16154   // will work with the patterns in canonical form.
16155   bool Swapped = false;
16156   if (isa<ConstantSDNode>(BasePtr)) {
16157     std::swap(BasePtr, Offset);
16158     Swapped = true;
16159   }
16160 
16161   // Don't create a indexed load / store with zero offset.
16162   if (isNullConstant(Offset))
16163     return false;
16164 
16165   // Try turning it into a pre-indexed load / store except when:
16166   // 1) The new base ptr is a frame index.
16167   // 2) If N is a store and the new base ptr is either the same as or is a
16168   //    predecessor of the value being stored.
16169   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
16170   //    that would create a cycle.
16171   // 4) All uses are load / store ops that use it as old base ptr.
16172 
16173   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
16174   // (plus the implicit offset) to a register to preinc anyway.
16175   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16176     return false;
16177 
16178   // Check #2.
16179   if (!IsLoad) {
16180     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
16181                            : cast<StoreSDNode>(N)->getValue();
16182 
16183     // Would require a copy.
16184     if (Val == BasePtr)
16185       return false;
16186 
16187     // Would create a cycle.
16188     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
16189       return false;
16190   }
16191 
16192   // Caches for hasPredecessorHelper.
16193   SmallPtrSet<const SDNode *, 32> Visited;
16194   SmallVector<const SDNode *, 16> Worklist;
16195   Worklist.push_back(N);
16196 
16197   // If the offset is a constant, there may be other adds of constants that
16198   // can be folded with this one. We should do this to avoid having to keep
16199   // a copy of the original base pointer.
16200   SmallVector<SDNode *, 16> OtherUses;
16201   if (isa<ConstantSDNode>(Offset))
16202     for (SDNode::use_iterator UI = BasePtr->use_begin(),
16203                               UE = BasePtr->use_end();
16204          UI != UE; ++UI) {
16205       SDUse &Use = UI.getUse();
16206       // Skip the use that is Ptr and uses of other results from BasePtr's
16207       // node (important for nodes that return multiple results).
16208       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
16209         continue;
16210 
16211       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
16212         continue;
16213 
16214       if (Use.getUser()->getOpcode() != ISD::ADD &&
16215           Use.getUser()->getOpcode() != ISD::SUB) {
16216         OtherUses.clear();
16217         break;
16218       }
16219 
16220       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
16221       if (!isa<ConstantSDNode>(Op1)) {
16222         OtherUses.clear();
16223         break;
16224       }
16225 
16226       // FIXME: In some cases, we can be smarter about this.
16227       if (Op1.getValueType() != Offset.getValueType()) {
16228         OtherUses.clear();
16229         break;
16230       }
16231 
16232       OtherUses.push_back(Use.getUser());
16233     }
16234 
16235   if (Swapped)
16236     std::swap(BasePtr, Offset);
16237 
16238   // Now check for #3 and #4.
16239   bool RealUse = false;
16240 
16241   for (SDNode *Use : Ptr->uses()) {
16242     if (Use == N)
16243       continue;
16244     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
16245       return false;
16246 
16247     // If Ptr may be folded in addressing mode of other use, then it's
16248     // not profitable to do this transformation.
16249     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
16250       RealUse = true;
16251   }
16252 
16253   if (!RealUse)
16254     return false;
16255 
16256   SDValue Result;
16257   if (!IsMasked) {
16258     if (IsLoad)
16259       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16260     else
16261       Result =
16262           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
16263   } else {
16264     if (IsLoad)
16265       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16266                                         Offset, AM);
16267     else
16268       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
16269                                          Offset, AM);
16270   }
16271   ++PreIndexedNodes;
16272   ++NodesCombined;
16273   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
16274              Result.dump(&DAG); dbgs() << '\n');
16275   WorklistRemover DeadNodes(*this);
16276   if (IsLoad) {
16277     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16278     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16279   } else {
16280     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16281   }
16282 
16283   // Finally, since the node is now dead, remove it from the graph.
16284   deleteAndRecombine(N);
16285 
16286   if (Swapped)
16287     std::swap(BasePtr, Offset);
16288 
16289   // Replace other uses of BasePtr that can be updated to use Ptr
16290   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
16291     unsigned OffsetIdx = 1;
16292     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
16293       OffsetIdx = 0;
16294     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
16295            BasePtr.getNode() && "Expected BasePtr operand");
16296 
16297     // We need to replace ptr0 in the following expression:
16298     //   x0 * offset0 + y0 * ptr0 = t0
16299     // knowing that
16300     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
16301     //
16302     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
16303     // indexed load/store and the expression that needs to be re-written.
16304     //
16305     // Therefore, we have:
16306     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
16307 
16308     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
16309     const APInt &Offset0 = CN->getAPIntValue();
16310     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
16311     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
16312     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
16313     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
16314     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
16315 
16316     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
16317 
16318     APInt CNV = Offset0;
16319     if (X0 < 0) CNV = -CNV;
16320     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
16321     else CNV = CNV - Offset1;
16322 
16323     SDLoc DL(OtherUses[i]);
16324 
16325     // We can now generate the new expression.
16326     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
16327     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
16328 
16329     SDValue NewUse = DAG.getNode(Opcode,
16330                                  DL,
16331                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
16332     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
16333     deleteAndRecombine(OtherUses[i]);
16334   }
16335 
16336   // Replace the uses of Ptr with uses of the updated base value.
16337   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
16338   deleteAndRecombine(Ptr.getNode());
16339   AddToWorklist(Result.getNode());
16340 
16341   return true;
16342 }
16343 
16344 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
16345                                    SDValue &BasePtr, SDValue &Offset,
16346                                    ISD::MemIndexedMode &AM,
16347                                    SelectionDAG &DAG,
16348                                    const TargetLowering &TLI) {
16349   if (PtrUse == N ||
16350       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
16351     return false;
16352 
16353   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
16354     return false;
16355 
16356   // Don't create a indexed load / store with zero offset.
16357   if (isNullConstant(Offset))
16358     return false;
16359 
16360   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
16361     return false;
16362 
16363   SmallPtrSet<const SDNode *, 32> Visited;
16364   for (SDNode *Use : BasePtr->uses()) {
16365     if (Use == Ptr.getNode())
16366       continue;
16367 
16368     // No if there's a later user which could perform the index instead.
16369     if (isa<MemSDNode>(Use)) {
16370       bool IsLoad = true;
16371       bool IsMasked = false;
16372       SDValue OtherPtr;
16373       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16374                                    IsMasked, OtherPtr, TLI)) {
16375         SmallVector<const SDNode *, 2> Worklist;
16376         Worklist.push_back(Use);
16377         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
16378           return false;
16379       }
16380     }
16381 
16382     // If all the uses are load / store addresses, then don't do the
16383     // transformation.
16384     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
16385       for (SDNode *UseUse : Use->uses())
16386         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
16387           return false;
16388     }
16389   }
16390   return true;
16391 }
16392 
16393 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
16394                                          bool &IsMasked, SDValue &Ptr,
16395                                          SDValue &BasePtr, SDValue &Offset,
16396                                          ISD::MemIndexedMode &AM,
16397                                          SelectionDAG &DAG,
16398                                          const TargetLowering &TLI) {
16399   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
16400                                 IsMasked, Ptr, TLI) ||
16401       Ptr->hasOneUse())
16402     return nullptr;
16403 
16404   // Try turning it into a post-indexed load / store except when
16405   // 1) All uses are load / store ops that use it as base ptr (and
16406   //    it may be folded as addressing mmode).
16407   // 2) Op must be independent of N, i.e. Op is neither a predecessor
16408   //    nor a successor of N. Otherwise, if Op is folded that would
16409   //    create a cycle.
16410   for (SDNode *Op : Ptr->uses()) {
16411     // Check for #1.
16412     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
16413       continue;
16414 
16415     // Check for #2.
16416     SmallPtrSet<const SDNode *, 32> Visited;
16417     SmallVector<const SDNode *, 8> Worklist;
16418     // Ptr is predecessor to both N and Op.
16419     Visited.insert(Ptr.getNode());
16420     Worklist.push_back(N);
16421     Worklist.push_back(Op);
16422     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
16423         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
16424       return Op;
16425   }
16426   return nullptr;
16427 }
16428 
16429 /// Try to combine a load/store with a add/sub of the base pointer node into a
16430 /// post-indexed load/store. The transformation folded the add/subtract into the
16431 /// new indexed load/store effectively and all of its uses are redirected to the
16432 /// new load/store.
16433 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
16434   if (Level < AfterLegalizeDAG)
16435     return false;
16436 
16437   bool IsLoad = true;
16438   bool IsMasked = false;
16439   SDValue Ptr;
16440   SDValue BasePtr;
16441   SDValue Offset;
16442   ISD::MemIndexedMode AM = ISD::UNINDEXED;
16443   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
16444                                          Offset, AM, DAG, TLI);
16445   if (!Op)
16446     return false;
16447 
16448   SDValue Result;
16449   if (!IsMasked)
16450     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
16451                                          Offset, AM)
16452                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
16453                                           BasePtr, Offset, AM);
16454   else
16455     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
16456                                                BasePtr, Offset, AM)
16457                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
16458                                                 BasePtr, Offset, AM);
16459   ++PostIndexedNodes;
16460   ++NodesCombined;
16461   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
16462              Result.dump(&DAG); dbgs() << '\n');
16463   WorklistRemover DeadNodes(*this);
16464   if (IsLoad) {
16465     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
16466     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
16467   } else {
16468     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
16469   }
16470 
16471   // Finally, since the node is now dead, remove it from the graph.
16472   deleteAndRecombine(N);
16473 
16474   // Replace the uses of Use with uses of the updated base value.
16475   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
16476                                 Result.getValue(IsLoad ? 1 : 0));
16477   deleteAndRecombine(Op);
16478   return true;
16479 }
16480 
16481 /// Return the base-pointer arithmetic from an indexed \p LD.
16482 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
16483   ISD::MemIndexedMode AM = LD->getAddressingMode();
16484   assert(AM != ISD::UNINDEXED);
16485   SDValue BP = LD->getOperand(1);
16486   SDValue Inc = LD->getOperand(2);
16487 
16488   // Some backends use TargetConstants for load offsets, but don't expect
16489   // TargetConstants in general ADD nodes. We can convert these constants into
16490   // regular Constants (if the constant is not opaque).
16491   assert((Inc.getOpcode() != ISD::TargetConstant ||
16492           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
16493          "Cannot split out indexing using opaque target constants");
16494   if (Inc.getOpcode() == ISD::TargetConstant) {
16495     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
16496     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
16497                           ConstInc->getValueType(0));
16498   }
16499 
16500   unsigned Opc =
16501       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
16502   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
16503 }
16504 
16505 static inline ElementCount numVectorEltsOrZero(EVT T) {
16506   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
16507 }
16508 
16509 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
16510   Val = ST->getValue();
16511   EVT STType = Val.getValueType();
16512   EVT STMemType = ST->getMemoryVT();
16513   if (STType == STMemType)
16514     return true;
16515   if (isTypeLegal(STMemType))
16516     return false; // fail.
16517   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
16518       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
16519     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
16520     return true;
16521   }
16522   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
16523       STType.isInteger() && STMemType.isInteger()) {
16524     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
16525     return true;
16526   }
16527   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
16528     Val = DAG.getBitcast(STMemType, Val);
16529     return true;
16530   }
16531   return false; // fail.
16532 }
16533 
16534 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
16535   EVT LDMemType = LD->getMemoryVT();
16536   EVT LDType = LD->getValueType(0);
16537   assert(Val.getValueType() == LDMemType &&
16538          "Attempting to extend value of non-matching type");
16539   if (LDType == LDMemType)
16540     return true;
16541   if (LDMemType.isInteger() && LDType.isInteger()) {
16542     switch (LD->getExtensionType()) {
16543     case ISD::NON_EXTLOAD:
16544       Val = DAG.getBitcast(LDType, Val);
16545       return true;
16546     case ISD::EXTLOAD:
16547       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
16548       return true;
16549     case ISD::SEXTLOAD:
16550       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
16551       return true;
16552     case ISD::ZEXTLOAD:
16553       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
16554       return true;
16555     }
16556   }
16557   return false;
16558 }
16559 
16560 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
16561   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
16562     return SDValue();
16563   SDValue Chain = LD->getOperand(0);
16564   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
16565   // TODO: Relax this restriction for unordered atomics (see D66309)
16566   if (!ST || !ST->isSimple())
16567     return SDValue();
16568 
16569   EVT LDType = LD->getValueType(0);
16570   EVT LDMemType = LD->getMemoryVT();
16571   EVT STMemType = ST->getMemoryVT();
16572   EVT STType = ST->getValue().getValueType();
16573 
16574   // There are two cases to consider here:
16575   //  1. The store is fixed width and the load is scalable. In this case we
16576   //     don't know at compile time if the store completely envelops the load
16577   //     so we abandon the optimisation.
16578   //  2. The store is scalable and the load is fixed width. We could
16579   //     potentially support a limited number of cases here, but there has been
16580   //     no cost-benefit analysis to prove it's worth it.
16581   bool LdStScalable = LDMemType.isScalableVector();
16582   if (LdStScalable != STMemType.isScalableVector())
16583     return SDValue();
16584 
16585   // If we are dealing with scalable vectors on a big endian platform the
16586   // calculation of offsets below becomes trickier, since we do not know at
16587   // compile time the absolute size of the vector. Until we've done more
16588   // analysis on big-endian platforms it seems better to bail out for now.
16589   if (LdStScalable && DAG.getDataLayout().isBigEndian())
16590     return SDValue();
16591 
16592   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
16593   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
16594   int64_t Offset;
16595   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
16596     return SDValue();
16597 
16598   // Normalize for Endianness. After this Offset=0 will denote that the least
16599   // significant bit in the loaded value maps to the least significant bit in
16600   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
16601   // n:th least significant byte of the stored value.
16602   if (DAG.getDataLayout().isBigEndian())
16603     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
16604               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
16605                  8 -
16606              Offset;
16607 
16608   // Check that the stored value cover all bits that are loaded.
16609   bool STCoversLD;
16610 
16611   TypeSize LdMemSize = LDMemType.getSizeInBits();
16612   TypeSize StMemSize = STMemType.getSizeInBits();
16613   if (LdStScalable)
16614     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
16615   else
16616     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
16617                                    StMemSize.getFixedSize());
16618 
16619   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
16620     if (LD->isIndexed()) {
16621       // Cannot handle opaque target constants and we must respect the user's
16622       // request not to split indexes from loads.
16623       if (!canSplitIdx(LD))
16624         return SDValue();
16625       SDValue Idx = SplitIndexingFromLoad(LD);
16626       SDValue Ops[] = {Val, Idx, Chain};
16627       return CombineTo(LD, Ops, 3);
16628     }
16629     return CombineTo(LD, Val, Chain);
16630   };
16631 
16632   if (!STCoversLD)
16633     return SDValue();
16634 
16635   // Memory as copy space (potentially masked).
16636   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
16637     // Simple case: Direct non-truncating forwarding
16638     if (LDType.getSizeInBits() == LdMemSize)
16639       return ReplaceLd(LD, ST->getValue(), Chain);
16640     // Can we model the truncate and extension with an and mask?
16641     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
16642         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
16643       // Mask to size of LDMemType
16644       auto Mask =
16645           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
16646                                                StMemSize.getFixedSize()),
16647                           SDLoc(ST), STType);
16648       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
16649       return ReplaceLd(LD, Val, Chain);
16650     }
16651   }
16652 
16653   // TODO: Deal with nonzero offset.
16654   if (LD->getBasePtr().isUndef() || Offset != 0)
16655     return SDValue();
16656   // Model necessary truncations / extenstions.
16657   SDValue Val;
16658   // Truncate Value To Stored Memory Size.
16659   do {
16660     if (!getTruncatedStoreValue(ST, Val))
16661       continue;
16662     if (!isTypeLegal(LDMemType))
16663       continue;
16664     if (STMemType != LDMemType) {
16665       // TODO: Support vectors? This requires extract_subvector/bitcast.
16666       if (!STMemType.isVector() && !LDMemType.isVector() &&
16667           STMemType.isInteger() && LDMemType.isInteger())
16668         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
16669       else
16670         continue;
16671     }
16672     if (!extendLoadedValueToExtension(LD, Val))
16673       continue;
16674     return ReplaceLd(LD, Val, Chain);
16675   } while (false);
16676 
16677   // On failure, cleanup dead nodes we may have created.
16678   if (Val->use_empty())
16679     deleteAndRecombine(Val.getNode());
16680   return SDValue();
16681 }
16682 
16683 SDValue DAGCombiner::visitLOAD(SDNode *N) {
16684   LoadSDNode *LD  = cast<LoadSDNode>(N);
16685   SDValue Chain = LD->getChain();
16686   SDValue Ptr   = LD->getBasePtr();
16687 
16688   // If load is not volatile and there are no uses of the loaded value (and
16689   // the updated indexed value in case of indexed loads), change uses of the
16690   // chain value into uses of the chain input (i.e. delete the dead load).
16691   // TODO: Allow this for unordered atomics (see D66309)
16692   if (LD->isSimple()) {
16693     if (N->getValueType(1) == MVT::Other) {
16694       // Unindexed loads.
16695       if (!N->hasAnyUseOfValue(0)) {
16696         // It's not safe to use the two value CombineTo variant here. e.g.
16697         // v1, chain2 = load chain1, loc
16698         // v2, chain3 = load chain2, loc
16699         // v3         = add v2, c
16700         // Now we replace use of chain2 with chain1.  This makes the second load
16701         // isomorphic to the one we are deleting, and thus makes this load live.
16702         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
16703                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
16704                    dbgs() << "\n");
16705         WorklistRemover DeadNodes(*this);
16706         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16707         AddUsersToWorklist(Chain.getNode());
16708         if (N->use_empty())
16709           deleteAndRecombine(N);
16710 
16711         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16712       }
16713     } else {
16714       // Indexed loads.
16715       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
16716 
16717       // If this load has an opaque TargetConstant offset, then we cannot split
16718       // the indexing into an add/sub directly (that TargetConstant may not be
16719       // valid for a different type of node, and we cannot convert an opaque
16720       // target constant into a regular constant).
16721       bool CanSplitIdx = canSplitIdx(LD);
16722 
16723       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
16724         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
16725         SDValue Index;
16726         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
16727           Index = SplitIndexingFromLoad(LD);
16728           // Try to fold the base pointer arithmetic into subsequent loads and
16729           // stores.
16730           AddUsersToWorklist(N);
16731         } else
16732           Index = DAG.getUNDEF(N->getValueType(1));
16733         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
16734                    dbgs() << "\nWith: "; Undef.dump(&DAG);
16735                    dbgs() << " and 2 other values\n");
16736         WorklistRemover DeadNodes(*this);
16737         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
16738         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
16739         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
16740         deleteAndRecombine(N);
16741         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
16742       }
16743     }
16744   }
16745 
16746   // If this load is directly stored, replace the load value with the stored
16747   // value.
16748   if (auto V = ForwardStoreValueToDirectLoad(LD))
16749     return V;
16750 
16751   // Try to infer better alignment information than the load already has.
16752   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
16753     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
16754       if (*Alignment > LD->getAlign() &&
16755           isAligned(*Alignment, LD->getSrcValueOffset())) {
16756         SDValue NewLoad = DAG.getExtLoad(
16757             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
16758             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
16759             LD->getMemOperand()->getFlags(), LD->getAAInfo());
16760         // NewLoad will always be N as we are only refining the alignment
16761         assert(NewLoad.getNode() == N);
16762         (void)NewLoad;
16763       }
16764     }
16765   }
16766 
16767   if (LD->isUnindexed()) {
16768     // Walk up chain skipping non-aliasing memory nodes.
16769     SDValue BetterChain = FindBetterChain(LD, Chain);
16770 
16771     // If there is a better chain.
16772     if (Chain != BetterChain) {
16773       SDValue ReplLoad;
16774 
16775       // Replace the chain to void dependency.
16776       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
16777         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
16778                                BetterChain, Ptr, LD->getMemOperand());
16779       } else {
16780         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
16781                                   LD->getValueType(0),
16782                                   BetterChain, Ptr, LD->getMemoryVT(),
16783                                   LD->getMemOperand());
16784       }
16785 
16786       // Create token factor to keep old chain connected.
16787       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
16788                                   MVT::Other, Chain, ReplLoad.getValue(1));
16789 
16790       // Replace uses with load result and token factor
16791       return CombineTo(N, ReplLoad.getValue(0), Token);
16792     }
16793   }
16794 
16795   // Try transforming N to an indexed load.
16796   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16797     return SDValue(N, 0);
16798 
16799   // Try to slice up N to more direct loads if the slices are mapped to
16800   // different register banks or pairing can take place.
16801   if (SliceUpLoad(N))
16802     return SDValue(N, 0);
16803 
16804   return SDValue();
16805 }
16806 
16807 namespace {
16808 
16809 /// Helper structure used to slice a load in smaller loads.
16810 /// Basically a slice is obtained from the following sequence:
16811 /// Origin = load Ty1, Base
16812 /// Shift = srl Ty1 Origin, CstTy Amount
16813 /// Inst = trunc Shift to Ty2
16814 ///
16815 /// Then, it will be rewritten into:
16816 /// Slice = load SliceTy, Base + SliceOffset
16817 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
16818 ///
16819 /// SliceTy is deduced from the number of bits that are actually used to
16820 /// build Inst.
16821 struct LoadedSlice {
16822   /// Helper structure used to compute the cost of a slice.
16823   struct Cost {
16824     /// Are we optimizing for code size.
16825     bool ForCodeSize = false;
16826 
16827     /// Various cost.
16828     unsigned Loads = 0;
16829     unsigned Truncates = 0;
16830     unsigned CrossRegisterBanksCopies = 0;
16831     unsigned ZExts = 0;
16832     unsigned Shift = 0;
16833 
16834     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
16835 
16836     /// Get the cost of one isolated slice.
16837     Cost(const LoadedSlice &LS, bool ForCodeSize)
16838         : ForCodeSize(ForCodeSize), Loads(1) {
16839       EVT TruncType = LS.Inst->getValueType(0);
16840       EVT LoadedType = LS.getLoadedType();
16841       if (TruncType != LoadedType &&
16842           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
16843         ZExts = 1;
16844     }
16845 
16846     /// Account for slicing gain in the current cost.
16847     /// Slicing provide a few gains like removing a shift or a
16848     /// truncate. This method allows to grow the cost of the original
16849     /// load with the gain from this slice.
16850     void addSliceGain(const LoadedSlice &LS) {
16851       // Each slice saves a truncate.
16852       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
16853       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
16854                               LS.Inst->getValueType(0)))
16855         ++Truncates;
16856       // If there is a shift amount, this slice gets rid of it.
16857       if (LS.Shift)
16858         ++Shift;
16859       // If this slice can merge a cross register bank copy, account for it.
16860       if (LS.canMergeExpensiveCrossRegisterBankCopy())
16861         ++CrossRegisterBanksCopies;
16862     }
16863 
16864     Cost &operator+=(const Cost &RHS) {
16865       Loads += RHS.Loads;
16866       Truncates += RHS.Truncates;
16867       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
16868       ZExts += RHS.ZExts;
16869       Shift += RHS.Shift;
16870       return *this;
16871     }
16872 
16873     bool operator==(const Cost &RHS) const {
16874       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
16875              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
16876              ZExts == RHS.ZExts && Shift == RHS.Shift;
16877     }
16878 
16879     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
16880 
16881     bool operator<(const Cost &RHS) const {
16882       // Assume cross register banks copies are as expensive as loads.
16883       // FIXME: Do we want some more target hooks?
16884       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
16885       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
16886       // Unless we are optimizing for code size, consider the
16887       // expensive operation first.
16888       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
16889         return ExpensiveOpsLHS < ExpensiveOpsRHS;
16890       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
16891              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
16892     }
16893 
16894     bool operator>(const Cost &RHS) const { return RHS < *this; }
16895 
16896     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
16897 
16898     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
16899   };
16900 
16901   // The last instruction that represent the slice. This should be a
16902   // truncate instruction.
16903   SDNode *Inst;
16904 
16905   // The original load instruction.
16906   LoadSDNode *Origin;
16907 
16908   // The right shift amount in bits from the original load.
16909   unsigned Shift;
16910 
16911   // The DAG from which Origin came from.
16912   // This is used to get some contextual information about legal types, etc.
16913   SelectionDAG *DAG;
16914 
16915   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
16916               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
16917       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
16918 
16919   /// Get the bits used in a chunk of bits \p BitWidth large.
16920   /// \return Result is \p BitWidth and has used bits set to 1 and
16921   ///         not used bits set to 0.
16922   APInt getUsedBits() const {
16923     // Reproduce the trunc(lshr) sequence:
16924     // - Start from the truncated value.
16925     // - Zero extend to the desired bit width.
16926     // - Shift left.
16927     assert(Origin && "No original load to compare against.");
16928     unsigned BitWidth = Origin->getValueSizeInBits(0);
16929     assert(Inst && "This slice is not bound to an instruction");
16930     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
16931            "Extracted slice is bigger than the whole type!");
16932     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
16933     UsedBits.setAllBits();
16934     UsedBits = UsedBits.zext(BitWidth);
16935     UsedBits <<= Shift;
16936     return UsedBits;
16937   }
16938 
16939   /// Get the size of the slice to be loaded in bytes.
16940   unsigned getLoadedSize() const {
16941     unsigned SliceSize = getUsedBits().countPopulation();
16942     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
16943     return SliceSize / 8;
16944   }
16945 
16946   /// Get the type that will be loaded for this slice.
16947   /// Note: This may not be the final type for the slice.
16948   EVT getLoadedType() const {
16949     assert(DAG && "Missing context");
16950     LLVMContext &Ctxt = *DAG->getContext();
16951     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
16952   }
16953 
16954   /// Get the alignment of the load used for this slice.
16955   Align getAlign() const {
16956     Align Alignment = Origin->getAlign();
16957     uint64_t Offset = getOffsetFromBase();
16958     if (Offset != 0)
16959       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
16960     return Alignment;
16961   }
16962 
16963   /// Check if this slice can be rewritten with legal operations.
16964   bool isLegal() const {
16965     // An invalid slice is not legal.
16966     if (!Origin || !Inst || !DAG)
16967       return false;
16968 
16969     // Offsets are for indexed load only, we do not handle that.
16970     if (!Origin->getOffset().isUndef())
16971       return false;
16972 
16973     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16974 
16975     // Check that the type is legal.
16976     EVT SliceType = getLoadedType();
16977     if (!TLI.isTypeLegal(SliceType))
16978       return false;
16979 
16980     // Check that the load is legal for this type.
16981     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
16982       return false;
16983 
16984     // Check that the offset can be computed.
16985     // 1. Check its type.
16986     EVT PtrType = Origin->getBasePtr().getValueType();
16987     if (PtrType == MVT::Untyped || PtrType.isExtended())
16988       return false;
16989 
16990     // 2. Check that it fits in the immediate.
16991     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
16992       return false;
16993 
16994     // 3. Check that the computation is legal.
16995     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
16996       return false;
16997 
16998     // Check that the zext is legal if it needs one.
16999     EVT TruncateType = Inst->getValueType(0);
17000     if (TruncateType != SliceType &&
17001         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
17002       return false;
17003 
17004     return true;
17005   }
17006 
17007   /// Get the offset in bytes of this slice in the original chunk of
17008   /// bits.
17009   /// \pre DAG != nullptr.
17010   uint64_t getOffsetFromBase() const {
17011     assert(DAG && "Missing context.");
17012     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
17013     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
17014     uint64_t Offset = Shift / 8;
17015     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
17016     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
17017            "The size of the original loaded type is not a multiple of a"
17018            " byte.");
17019     // If Offset is bigger than TySizeInBytes, it means we are loading all
17020     // zeros. This should have been optimized before in the process.
17021     assert(TySizeInBytes > Offset &&
17022            "Invalid shift amount for given loaded size");
17023     if (IsBigEndian)
17024       Offset = TySizeInBytes - Offset - getLoadedSize();
17025     return Offset;
17026   }
17027 
17028   /// Generate the sequence of instructions to load the slice
17029   /// represented by this object and redirect the uses of this slice to
17030   /// this new sequence of instructions.
17031   /// \pre this->Inst && this->Origin are valid Instructions and this
17032   /// object passed the legal check: LoadedSlice::isLegal returned true.
17033   /// \return The last instruction of the sequence used to load the slice.
17034   SDValue loadSlice() const {
17035     assert(Inst && Origin && "Unable to replace a non-existing slice.");
17036     const SDValue &OldBaseAddr = Origin->getBasePtr();
17037     SDValue BaseAddr = OldBaseAddr;
17038     // Get the offset in that chunk of bytes w.r.t. the endianness.
17039     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
17040     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
17041     if (Offset) {
17042       // BaseAddr = BaseAddr + Offset.
17043       EVT ArithType = BaseAddr.getValueType();
17044       SDLoc DL(Origin);
17045       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
17046                               DAG->getConstant(Offset, DL, ArithType));
17047     }
17048 
17049     // Create the type of the loaded slice according to its size.
17050     EVT SliceType = getLoadedType();
17051 
17052     // Create the load for the slice.
17053     SDValue LastInst =
17054         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
17055                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
17056                      Origin->getMemOperand()->getFlags());
17057     // If the final type is not the same as the loaded type, this means that
17058     // we have to pad with zero. Create a zero extend for that.
17059     EVT FinalType = Inst->getValueType(0);
17060     if (SliceType != FinalType)
17061       LastInst =
17062           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
17063     return LastInst;
17064   }
17065 
17066   /// Check if this slice can be merged with an expensive cross register
17067   /// bank copy. E.g.,
17068   /// i = load i32
17069   /// f = bitcast i32 i to float
17070   bool canMergeExpensiveCrossRegisterBankCopy() const {
17071     if (!Inst || !Inst->hasOneUse())
17072       return false;
17073     SDNode *Use = *Inst->use_begin();
17074     if (Use->getOpcode() != ISD::BITCAST)
17075       return false;
17076     assert(DAG && "Missing context");
17077     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
17078     EVT ResVT = Use->getValueType(0);
17079     const TargetRegisterClass *ResRC =
17080         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
17081     const TargetRegisterClass *ArgRC =
17082         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
17083                            Use->getOperand(0)->isDivergent());
17084     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
17085       return false;
17086 
17087     // At this point, we know that we perform a cross-register-bank copy.
17088     // Check if it is expensive.
17089     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
17090     // Assume bitcasts are cheap, unless both register classes do not
17091     // explicitly share a common sub class.
17092     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
17093       return false;
17094 
17095     // Check if it will be merged with the load.
17096     // 1. Check the alignment / fast memory access constraint.
17097     bool IsFast = false;
17098     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
17099                                 Origin->getAddressSpace(), getAlign(),
17100                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
17101         !IsFast)
17102       return false;
17103 
17104     // 2. Check that the load is a legal operation for that type.
17105     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
17106       return false;
17107 
17108     // 3. Check that we do not have a zext in the way.
17109     if (Inst->getValueType(0) != getLoadedType())
17110       return false;
17111 
17112     return true;
17113   }
17114 };
17115 
17116 } // end anonymous namespace
17117 
17118 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
17119 /// \p UsedBits looks like 0..0 1..1 0..0.
17120 static bool areUsedBitsDense(const APInt &UsedBits) {
17121   // If all the bits are one, this is dense!
17122   if (UsedBits.isAllOnes())
17123     return true;
17124 
17125   // Get rid of the unused bits on the right.
17126   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
17127   // Get rid of the unused bits on the left.
17128   if (NarrowedUsedBits.countLeadingZeros())
17129     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
17130   // Check that the chunk of bits is completely used.
17131   return NarrowedUsedBits.isAllOnes();
17132 }
17133 
17134 /// Check whether or not \p First and \p Second are next to each other
17135 /// in memory. This means that there is no hole between the bits loaded
17136 /// by \p First and the bits loaded by \p Second.
17137 static bool areSlicesNextToEachOther(const LoadedSlice &First,
17138                                      const LoadedSlice &Second) {
17139   assert(First.Origin == Second.Origin && First.Origin &&
17140          "Unable to match different memory origins.");
17141   APInt UsedBits = First.getUsedBits();
17142   assert((UsedBits & Second.getUsedBits()) == 0 &&
17143          "Slices are not supposed to overlap.");
17144   UsedBits |= Second.getUsedBits();
17145   return areUsedBitsDense(UsedBits);
17146 }
17147 
17148 /// Adjust the \p GlobalLSCost according to the target
17149 /// paring capabilities and the layout of the slices.
17150 /// \pre \p GlobalLSCost should account for at least as many loads as
17151 /// there is in the slices in \p LoadedSlices.
17152 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17153                                  LoadedSlice::Cost &GlobalLSCost) {
17154   unsigned NumberOfSlices = LoadedSlices.size();
17155   // If there is less than 2 elements, no pairing is possible.
17156   if (NumberOfSlices < 2)
17157     return;
17158 
17159   // Sort the slices so that elements that are likely to be next to each
17160   // other in memory are next to each other in the list.
17161   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
17162     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
17163     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
17164   });
17165   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
17166   // First (resp. Second) is the first (resp. Second) potentially candidate
17167   // to be placed in a paired load.
17168   const LoadedSlice *First = nullptr;
17169   const LoadedSlice *Second = nullptr;
17170   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
17171                 // Set the beginning of the pair.
17172                                                            First = Second) {
17173     Second = &LoadedSlices[CurrSlice];
17174 
17175     // If First is NULL, it means we start a new pair.
17176     // Get to the next slice.
17177     if (!First)
17178       continue;
17179 
17180     EVT LoadedType = First->getLoadedType();
17181 
17182     // If the types of the slices are different, we cannot pair them.
17183     if (LoadedType != Second->getLoadedType())
17184       continue;
17185 
17186     // Check if the target supplies paired loads for this type.
17187     Align RequiredAlignment;
17188     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
17189       // move to the next pair, this type is hopeless.
17190       Second = nullptr;
17191       continue;
17192     }
17193     // Check if we meet the alignment requirement.
17194     if (First->getAlign() < RequiredAlignment)
17195       continue;
17196 
17197     // Check that both loads are next to each other in memory.
17198     if (!areSlicesNextToEachOther(*First, *Second))
17199       continue;
17200 
17201     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
17202     --GlobalLSCost.Loads;
17203     // Move to the next pair.
17204     Second = nullptr;
17205   }
17206 }
17207 
17208 /// Check the profitability of all involved LoadedSlice.
17209 /// Currently, it is considered profitable if there is exactly two
17210 /// involved slices (1) which are (2) next to each other in memory, and
17211 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
17212 ///
17213 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
17214 /// the elements themselves.
17215 ///
17216 /// FIXME: When the cost model will be mature enough, we can relax
17217 /// constraints (1) and (2).
17218 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
17219                                 const APInt &UsedBits, bool ForCodeSize) {
17220   unsigned NumberOfSlices = LoadedSlices.size();
17221   if (StressLoadSlicing)
17222     return NumberOfSlices > 1;
17223 
17224   // Check (1).
17225   if (NumberOfSlices != 2)
17226     return false;
17227 
17228   // Check (2).
17229   if (!areUsedBitsDense(UsedBits))
17230     return false;
17231 
17232   // Check (3).
17233   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
17234   // The original code has one big load.
17235   OrigCost.Loads = 1;
17236   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
17237     const LoadedSlice &LS = LoadedSlices[CurrSlice];
17238     // Accumulate the cost of all the slices.
17239     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
17240     GlobalSlicingCost += SliceCost;
17241 
17242     // Account as cost in the original configuration the gain obtained
17243     // with the current slices.
17244     OrigCost.addSliceGain(LS);
17245   }
17246 
17247   // If the target supports paired load, adjust the cost accordingly.
17248   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
17249   return OrigCost > GlobalSlicingCost;
17250 }
17251 
17252 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
17253 /// operations, split it in the various pieces being extracted.
17254 ///
17255 /// This sort of thing is introduced by SROA.
17256 /// This slicing takes care not to insert overlapping loads.
17257 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
17258 bool DAGCombiner::SliceUpLoad(SDNode *N) {
17259   if (Level < AfterLegalizeDAG)
17260     return false;
17261 
17262   LoadSDNode *LD = cast<LoadSDNode>(N);
17263   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
17264       !LD->getValueType(0).isInteger())
17265     return false;
17266 
17267   // The algorithm to split up a load of a scalable vector into individual
17268   // elements currently requires knowing the length of the loaded type,
17269   // so will need adjusting to work on scalable vectors.
17270   if (LD->getValueType(0).isScalableVector())
17271     return false;
17272 
17273   // Keep track of already used bits to detect overlapping values.
17274   // In that case, we will just abort the transformation.
17275   APInt UsedBits(LD->getValueSizeInBits(0), 0);
17276 
17277   SmallVector<LoadedSlice, 4> LoadedSlices;
17278 
17279   // Check if this load is used as several smaller chunks of bits.
17280   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
17281   // of computation for each trunc.
17282   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
17283        UI != UIEnd; ++UI) {
17284     // Skip the uses of the chain.
17285     if (UI.getUse().getResNo() != 0)
17286       continue;
17287 
17288     SDNode *User = *UI;
17289     unsigned Shift = 0;
17290 
17291     // Check if this is a trunc(lshr).
17292     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
17293         isa<ConstantSDNode>(User->getOperand(1))) {
17294       Shift = User->getConstantOperandVal(1);
17295       User = *User->use_begin();
17296     }
17297 
17298     // At this point, User is a Truncate, iff we encountered, trunc or
17299     // trunc(lshr).
17300     if (User->getOpcode() != ISD::TRUNCATE)
17301       return false;
17302 
17303     // The width of the type must be a power of 2 and greater than 8-bits.
17304     // Otherwise the load cannot be represented in LLVM IR.
17305     // Moreover, if we shifted with a non-8-bits multiple, the slice
17306     // will be across several bytes. We do not support that.
17307     unsigned Width = User->getValueSizeInBits(0);
17308     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
17309       return false;
17310 
17311     // Build the slice for this chain of computations.
17312     LoadedSlice LS(User, LD, Shift, &DAG);
17313     APInt CurrentUsedBits = LS.getUsedBits();
17314 
17315     // Check if this slice overlaps with another.
17316     if ((CurrentUsedBits & UsedBits) != 0)
17317       return false;
17318     // Update the bits used globally.
17319     UsedBits |= CurrentUsedBits;
17320 
17321     // Check if the new slice would be legal.
17322     if (!LS.isLegal())
17323       return false;
17324 
17325     // Record the slice.
17326     LoadedSlices.push_back(LS);
17327   }
17328 
17329   // Abort slicing if it does not seem to be profitable.
17330   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
17331     return false;
17332 
17333   ++SlicedLoads;
17334 
17335   // Rewrite each chain to use an independent load.
17336   // By construction, each chain can be represented by a unique load.
17337 
17338   // Prepare the argument for the new token factor for all the slices.
17339   SmallVector<SDValue, 8> ArgChains;
17340   for (const LoadedSlice &LS : LoadedSlices) {
17341     SDValue SliceInst = LS.loadSlice();
17342     CombineTo(LS.Inst, SliceInst, true);
17343     if (SliceInst.getOpcode() != ISD::LOAD)
17344       SliceInst = SliceInst.getOperand(0);
17345     assert(SliceInst->getOpcode() == ISD::LOAD &&
17346            "It takes more than a zext to get to the loaded slice!!");
17347     ArgChains.push_back(SliceInst.getValue(1));
17348   }
17349 
17350   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
17351                               ArgChains);
17352   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
17353   AddToWorklist(Chain.getNode());
17354   return true;
17355 }
17356 
17357 /// Check to see if V is (and load (ptr), imm), where the load is having
17358 /// specific bytes cleared out.  If so, return the byte size being masked out
17359 /// and the shift amount.
17360 static std::pair<unsigned, unsigned>
17361 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
17362   std::pair<unsigned, unsigned> Result(0, 0);
17363 
17364   // Check for the structure we're looking for.
17365   if (V->getOpcode() != ISD::AND ||
17366       !isa<ConstantSDNode>(V->getOperand(1)) ||
17367       !ISD::isNormalLoad(V->getOperand(0).getNode()))
17368     return Result;
17369 
17370   // Check the chain and pointer.
17371   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
17372   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
17373 
17374   // This only handles simple types.
17375   if (V.getValueType() != MVT::i16 &&
17376       V.getValueType() != MVT::i32 &&
17377       V.getValueType() != MVT::i64)
17378     return Result;
17379 
17380   // Check the constant mask.  Invert it so that the bits being masked out are
17381   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
17382   // follow the sign bit for uniformity.
17383   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
17384   unsigned NotMaskLZ = countLeadingZeros(NotMask);
17385   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
17386   unsigned NotMaskTZ = countTrailingZeros(NotMask);
17387   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
17388   if (NotMaskLZ == 64) return Result;  // All zero mask.
17389 
17390   // See if we have a continuous run of bits.  If so, we have 0*1+0*
17391   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
17392     return Result;
17393 
17394   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
17395   if (V.getValueType() != MVT::i64 && NotMaskLZ)
17396     NotMaskLZ -= 64-V.getValueSizeInBits();
17397 
17398   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
17399   switch (MaskedBytes) {
17400   case 1:
17401   case 2:
17402   case 4: break;
17403   default: return Result; // All one mask, or 5-byte mask.
17404   }
17405 
17406   // Verify that the first bit starts at a multiple of mask so that the access
17407   // is aligned the same as the access width.
17408   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
17409 
17410   // For narrowing to be valid, it must be the case that the load the
17411   // immediately preceding memory operation before the store.
17412   if (LD == Chain.getNode())
17413     ; // ok.
17414   else if (Chain->getOpcode() == ISD::TokenFactor &&
17415            SDValue(LD, 1).hasOneUse()) {
17416     // LD has only 1 chain use so they are no indirect dependencies.
17417     if (!LD->isOperandOf(Chain.getNode()))
17418       return Result;
17419   } else
17420     return Result; // Fail.
17421 
17422   Result.first = MaskedBytes;
17423   Result.second = NotMaskTZ/8;
17424   return Result;
17425 }
17426 
17427 /// Check to see if IVal is something that provides a value as specified by
17428 /// MaskInfo. If so, replace the specified store with a narrower store of
17429 /// truncated IVal.
17430 static SDValue
17431 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
17432                                 SDValue IVal, StoreSDNode *St,
17433                                 DAGCombiner *DC) {
17434   unsigned NumBytes = MaskInfo.first;
17435   unsigned ByteShift = MaskInfo.second;
17436   SelectionDAG &DAG = DC->getDAG();
17437 
17438   // Check to see if IVal is all zeros in the part being masked in by the 'or'
17439   // that uses this.  If not, this is not a replacement.
17440   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
17441                                   ByteShift*8, (ByteShift+NumBytes)*8);
17442   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
17443 
17444   // Check that it is legal on the target to do this.  It is legal if the new
17445   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
17446   // legalization. If the source type is legal, but the store type isn't, see
17447   // if we can use a truncating store.
17448   MVT VT = MVT::getIntegerVT(NumBytes * 8);
17449   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17450   bool UseTruncStore;
17451   if (DC->isTypeLegal(VT))
17452     UseTruncStore = false;
17453   else if (TLI.isTypeLegal(IVal.getValueType()) &&
17454            TLI.isTruncStoreLegal(IVal.getValueType(), VT))
17455     UseTruncStore = true;
17456   else
17457     return SDValue();
17458   // Check that the target doesn't think this is a bad idea.
17459   if (St->getMemOperand() &&
17460       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
17461                               *St->getMemOperand()))
17462     return SDValue();
17463 
17464   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
17465   // shifted by ByteShift and truncated down to NumBytes.
17466   if (ByteShift) {
17467     SDLoc DL(IVal);
17468     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
17469                        DAG.getConstant(ByteShift*8, DL,
17470                                     DC->getShiftAmountTy(IVal.getValueType())));
17471   }
17472 
17473   // Figure out the offset for the store and the alignment of the access.
17474   unsigned StOffset;
17475   if (DAG.getDataLayout().isLittleEndian())
17476     StOffset = ByteShift;
17477   else
17478     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
17479 
17480   SDValue Ptr = St->getBasePtr();
17481   if (StOffset) {
17482     SDLoc DL(IVal);
17483     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
17484   }
17485 
17486   ++OpsNarrowed;
17487   if (UseTruncStore)
17488     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
17489                              St->getPointerInfo().getWithOffset(StOffset),
17490                              VT, St->getOriginalAlign());
17491 
17492   // Truncate down to the new size.
17493   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
17494 
17495   return DAG
17496       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
17497                 St->getPointerInfo().getWithOffset(StOffset),
17498                 St->getOriginalAlign());
17499 }
17500 
17501 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
17502 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
17503 /// narrowing the load and store if it would end up being a win for performance
17504 /// or code size.
17505 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
17506   StoreSDNode *ST  = cast<StoreSDNode>(N);
17507   if (!ST->isSimple())
17508     return SDValue();
17509 
17510   SDValue Chain = ST->getChain();
17511   SDValue Value = ST->getValue();
17512   SDValue Ptr   = ST->getBasePtr();
17513   EVT VT = Value.getValueType();
17514 
17515   if (ST->isTruncatingStore() || VT.isVector())
17516     return SDValue();
17517 
17518   unsigned Opc = Value.getOpcode();
17519 
17520   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
17521       !Value.hasOneUse())
17522     return SDValue();
17523 
17524   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
17525   // is a byte mask indicating a consecutive number of bytes, check to see if
17526   // Y is known to provide just those bytes.  If so, we try to replace the
17527   // load + replace + store sequence with a single (narrower) store, which makes
17528   // the load dead.
17529   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
17530     std::pair<unsigned, unsigned> MaskedLoad;
17531     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
17532     if (MaskedLoad.first)
17533       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17534                                                   Value.getOperand(1), ST,this))
17535         return NewST;
17536 
17537     // Or is commutative, so try swapping X and Y.
17538     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
17539     if (MaskedLoad.first)
17540       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
17541                                                   Value.getOperand(0), ST,this))
17542         return NewST;
17543   }
17544 
17545   if (!EnableReduceLoadOpStoreWidth)
17546     return SDValue();
17547 
17548   if (Value.getOperand(1).getOpcode() != ISD::Constant)
17549     return SDValue();
17550 
17551   SDValue N0 = Value.getOperand(0);
17552   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17553       Chain == SDValue(N0.getNode(), 1)) {
17554     LoadSDNode *LD = cast<LoadSDNode>(N0);
17555     if (LD->getBasePtr() != Ptr ||
17556         LD->getPointerInfo().getAddrSpace() !=
17557         ST->getPointerInfo().getAddrSpace())
17558       return SDValue();
17559 
17560     // Find the type to narrow it the load / op / store to.
17561     SDValue N1 = Value.getOperand(1);
17562     unsigned BitWidth = N1.getValueSizeInBits();
17563     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
17564     if (Opc == ISD::AND)
17565       Imm ^= APInt::getAllOnes(BitWidth);
17566     if (Imm == 0 || Imm.isAllOnes())
17567       return SDValue();
17568     unsigned ShAmt = Imm.countTrailingZeros();
17569     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
17570     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
17571     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17572     // The narrowing should be profitable, the load/store operation should be
17573     // legal (or custom) and the store size should be equal to the NewVT width.
17574     while (NewBW < BitWidth &&
17575            (NewVT.getStoreSizeInBits() != NewBW ||
17576             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
17577             !TLI.isNarrowingProfitable(VT, NewVT))) {
17578       NewBW = NextPowerOf2(NewBW);
17579       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
17580     }
17581     if (NewBW >= BitWidth)
17582       return SDValue();
17583 
17584     // If the lsb changed does not start at the type bitwidth boundary,
17585     // start at the previous one.
17586     if (ShAmt % NewBW)
17587       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
17588     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
17589                                    std::min(BitWidth, ShAmt + NewBW));
17590     if ((Imm & Mask) == Imm) {
17591       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
17592       if (Opc == ISD::AND)
17593         NewImm ^= APInt::getAllOnes(NewBW);
17594       uint64_t PtrOff = ShAmt / 8;
17595       // For big endian targets, we need to adjust the offset to the pointer to
17596       // load the correct bytes.
17597       if (DAG.getDataLayout().isBigEndian())
17598         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
17599 
17600       bool IsFast = false;
17601       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
17602       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
17603                                   LD->getAddressSpace(), NewAlign,
17604                                   LD->getMemOperand()->getFlags(), &IsFast) ||
17605           !IsFast)
17606         return SDValue();
17607 
17608       SDValue NewPtr =
17609           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
17610       SDValue NewLD =
17611           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
17612                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
17613                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
17614       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
17615                                    DAG.getConstant(NewImm, SDLoc(Value),
17616                                                    NewVT));
17617       SDValue NewST =
17618           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
17619                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
17620 
17621       AddToWorklist(NewPtr.getNode());
17622       AddToWorklist(NewLD.getNode());
17623       AddToWorklist(NewVal.getNode());
17624       WorklistRemover DeadNodes(*this);
17625       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
17626       ++OpsNarrowed;
17627       return NewST;
17628     }
17629   }
17630 
17631   return SDValue();
17632 }
17633 
17634 /// For a given floating point load / store pair, if the load value isn't used
17635 /// by any other operations, then consider transforming the pair to integer
17636 /// load / store operations if the target deems the transformation profitable.
17637 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
17638   StoreSDNode *ST  = cast<StoreSDNode>(N);
17639   SDValue Value = ST->getValue();
17640   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
17641       Value.hasOneUse()) {
17642     LoadSDNode *LD = cast<LoadSDNode>(Value);
17643     EVT VT = LD->getMemoryVT();
17644     if (!VT.isFloatingPoint() ||
17645         VT != ST->getMemoryVT() ||
17646         LD->isNonTemporal() ||
17647         ST->isNonTemporal() ||
17648         LD->getPointerInfo().getAddrSpace() != 0 ||
17649         ST->getPointerInfo().getAddrSpace() != 0)
17650       return SDValue();
17651 
17652     TypeSize VTSize = VT.getSizeInBits();
17653 
17654     // We don't know the size of scalable types at compile time so we cannot
17655     // create an integer of the equivalent size.
17656     if (VTSize.isScalable())
17657       return SDValue();
17658 
17659     bool FastLD = false, FastST = false;
17660     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
17661     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
17662         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
17663         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
17664         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
17665         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17666                                 *LD->getMemOperand(), &FastLD) ||
17667         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
17668                                 *ST->getMemOperand(), &FastST) ||
17669         !FastLD || !FastST)
17670       return SDValue();
17671 
17672     SDValue NewLD =
17673         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
17674                     LD->getPointerInfo(), LD->getAlign());
17675 
17676     SDValue NewST =
17677         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
17678                      ST->getPointerInfo(), ST->getAlign());
17679 
17680     AddToWorklist(NewLD.getNode());
17681     AddToWorklist(NewST.getNode());
17682     WorklistRemover DeadNodes(*this);
17683     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
17684     ++LdStFP2Int;
17685     return NewST;
17686   }
17687 
17688   return SDValue();
17689 }
17690 
17691 // This is a helper function for visitMUL to check the profitability
17692 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
17693 // MulNode is the original multiply, AddNode is (add x, c1),
17694 // and ConstNode is c2.
17695 //
17696 // If the (add x, c1) has multiple uses, we could increase
17697 // the number of adds if we make this transformation.
17698 // It would only be worth doing this if we can remove a
17699 // multiply in the process. Check for that here.
17700 // To illustrate:
17701 //     (A + c1) * c3
17702 //     (A + c2) * c3
17703 // We're checking for cases where we have common "c3 * A" expressions.
17704 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
17705                                               SDValue ConstNode) {
17706   APInt Val;
17707 
17708   // If the add only has one use, and the target thinks the folding is
17709   // profitable or does not lead to worse code, this would be OK to do.
17710   if (AddNode->hasOneUse() &&
17711       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
17712     return true;
17713 
17714   // Walk all the users of the constant with which we're multiplying.
17715   for (SDNode *Use : ConstNode->uses()) {
17716     if (Use == MulNode) // This use is the one we're on right now. Skip it.
17717       continue;
17718 
17719     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
17720       SDNode *OtherOp;
17721       SDNode *MulVar = AddNode.getOperand(0).getNode();
17722 
17723       // OtherOp is what we're multiplying against the constant.
17724       if (Use->getOperand(0) == ConstNode)
17725         OtherOp = Use->getOperand(1).getNode();
17726       else
17727         OtherOp = Use->getOperand(0).getNode();
17728 
17729       // Check to see if multiply is with the same operand of our "add".
17730       //
17731       //     ConstNode  = CONST
17732       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
17733       //     ...
17734       //     AddNode  = (A + c1)  <-- MulVar is A.
17735       //         = AddNode * ConstNode   <-- current visiting instruction.
17736       //
17737       // If we make this transformation, we will have a common
17738       // multiply (ConstNode * A) that we can save.
17739       if (OtherOp == MulVar)
17740         return true;
17741 
17742       // Now check to see if a future expansion will give us a common
17743       // multiply.
17744       //
17745       //     ConstNode  = CONST
17746       //     AddNode    = (A + c1)
17747       //     ...   = AddNode * ConstNode <-- current visiting instruction.
17748       //     ...
17749       //     OtherOp = (A + c2)
17750       //     Use     = OtherOp * ConstNode <-- visiting Use.
17751       //
17752       // If we make this transformation, we will have a common
17753       // multiply (CONST * A) after we also do the same transformation
17754       // to the "t2" instruction.
17755       if (OtherOp->getOpcode() == ISD::ADD &&
17756           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
17757           OtherOp->getOperand(0).getNode() == MulVar)
17758         return true;
17759     }
17760   }
17761 
17762   // Didn't find a case where this would be profitable.
17763   return false;
17764 }
17765 
17766 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
17767                                          unsigned NumStores) {
17768   SmallVector<SDValue, 8> Chains;
17769   SmallPtrSet<const SDNode *, 8> Visited;
17770   SDLoc StoreDL(StoreNodes[0].MemNode);
17771 
17772   for (unsigned i = 0; i < NumStores; ++i) {
17773     Visited.insert(StoreNodes[i].MemNode);
17774   }
17775 
17776   // don't include nodes that are children or repeated nodes.
17777   for (unsigned i = 0; i < NumStores; ++i) {
17778     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
17779       Chains.push_back(StoreNodes[i].MemNode->getChain());
17780   }
17781 
17782   assert(Chains.size() > 0 && "Chain should have generated a chain");
17783   return DAG.getTokenFactor(StoreDL, Chains);
17784 }
17785 
17786 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
17787     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
17788     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
17789   // Make sure we have something to merge.
17790   if (NumStores < 2)
17791     return false;
17792 
17793   assert((!UseTrunc || !UseVector) &&
17794          "This optimization cannot emit a vector truncating store");
17795 
17796   // The latest Node in the DAG.
17797   SDLoc DL(StoreNodes[0].MemNode);
17798 
17799   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
17800   unsigned SizeInBits = NumStores * ElementSizeBits;
17801   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17802 
17803   Optional<MachineMemOperand::Flags> Flags;
17804   AAMDNodes AAInfo;
17805   for (unsigned I = 0; I != NumStores; ++I) {
17806     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17807     if (!Flags) {
17808       Flags = St->getMemOperand()->getFlags();
17809       AAInfo = St->getAAInfo();
17810       continue;
17811     }
17812     // Skip merging if there's an inconsistent flag.
17813     if (Flags != St->getMemOperand()->getFlags())
17814       return false;
17815     // Concatenate AA metadata.
17816     AAInfo = AAInfo.concat(St->getAAInfo());
17817   }
17818 
17819   EVT StoreTy;
17820   if (UseVector) {
17821     unsigned Elts = NumStores * NumMemElts;
17822     // Get the type for the merged vector store.
17823     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17824   } else
17825     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
17826 
17827   SDValue StoredVal;
17828   if (UseVector) {
17829     if (IsConstantSrc) {
17830       SmallVector<SDValue, 8> BuildVector;
17831       for (unsigned I = 0; I != NumStores; ++I) {
17832         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
17833         SDValue Val = St->getValue();
17834         // If constant is of the wrong type, convert it now.
17835         if (MemVT != Val.getValueType()) {
17836           Val = peekThroughBitcasts(Val);
17837           // Deal with constants of wrong size.
17838           if (ElementSizeBits != Val.getValueSizeInBits()) {
17839             EVT IntMemVT =
17840                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
17841             if (isa<ConstantFPSDNode>(Val)) {
17842               // Not clear how to truncate FP values.
17843               return false;
17844             }
17845 
17846             if (auto *C = dyn_cast<ConstantSDNode>(Val))
17847               Val = DAG.getConstant(C->getAPIntValue()
17848                                         .zextOrTrunc(Val.getValueSizeInBits())
17849                                         .zextOrTrunc(ElementSizeBits),
17850                                     SDLoc(C), IntMemVT);
17851           }
17852           // Make sure correctly size type is the correct type.
17853           Val = DAG.getBitcast(MemVT, Val);
17854         }
17855         BuildVector.push_back(Val);
17856       }
17857       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17858                                                : ISD::BUILD_VECTOR,
17859                               DL, StoreTy, BuildVector);
17860     } else {
17861       SmallVector<SDValue, 8> Ops;
17862       for (unsigned i = 0; i < NumStores; ++i) {
17863         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17864         SDValue Val = peekThroughBitcasts(St->getValue());
17865         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
17866         // type MemVT. If the underlying value is not the correct
17867         // type, but it is an extraction of an appropriate vector we
17868         // can recast Val to be of the correct type. This may require
17869         // converting between EXTRACT_VECTOR_ELT and
17870         // EXTRACT_SUBVECTOR.
17871         if ((MemVT != Val.getValueType()) &&
17872             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
17873              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
17874           EVT MemVTScalarTy = MemVT.getScalarType();
17875           // We may need to add a bitcast here to get types to line up.
17876           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
17877             Val = DAG.getBitcast(MemVT, Val);
17878           } else {
17879             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
17880                                             : ISD::EXTRACT_VECTOR_ELT;
17881             SDValue Vec = Val.getOperand(0);
17882             SDValue Idx = Val.getOperand(1);
17883             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
17884           }
17885         }
17886         Ops.push_back(Val);
17887       }
17888 
17889       // Build the extracted vector elements back into a vector.
17890       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
17891                                                : ISD::BUILD_VECTOR,
17892                               DL, StoreTy, Ops);
17893     }
17894   } else {
17895     // We should always use a vector store when merging extracted vector
17896     // elements, so this path implies a store of constants.
17897     assert(IsConstantSrc && "Merged vector elements should use vector store");
17898 
17899     APInt StoreInt(SizeInBits, 0);
17900 
17901     // Construct a single integer constant which is made of the smaller
17902     // constant inputs.
17903     bool IsLE = DAG.getDataLayout().isLittleEndian();
17904     for (unsigned i = 0; i < NumStores; ++i) {
17905       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
17906       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
17907 
17908       SDValue Val = St->getValue();
17909       Val = peekThroughBitcasts(Val);
17910       StoreInt <<= ElementSizeBits;
17911       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
17912         StoreInt |= C->getAPIntValue()
17913                         .zextOrTrunc(ElementSizeBits)
17914                         .zextOrTrunc(SizeInBits);
17915       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
17916         StoreInt |= C->getValueAPF()
17917                         .bitcastToAPInt()
17918                         .zextOrTrunc(ElementSizeBits)
17919                         .zextOrTrunc(SizeInBits);
17920         // If fp truncation is necessary give up for now.
17921         if (MemVT.getSizeInBits() != ElementSizeBits)
17922           return false;
17923       } else {
17924         llvm_unreachable("Invalid constant element type");
17925       }
17926     }
17927 
17928     // Create the new Load and Store operations.
17929     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
17930   }
17931 
17932   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17933   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
17934 
17935   // make sure we use trunc store if it's necessary to be legal.
17936   SDValue NewStore;
17937   if (!UseTrunc) {
17938     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
17939                             FirstInChain->getPointerInfo(),
17940                             FirstInChain->getAlign(), *Flags, AAInfo);
17941   } else { // Must be realized as a trunc store
17942     EVT LegalizedStoredValTy =
17943         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
17944     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
17945     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
17946     SDValue ExtendedStoreVal =
17947         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
17948                         LegalizedStoredValTy);
17949     NewStore = DAG.getTruncStore(
17950         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
17951         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
17952         FirstInChain->getAlign(), *Flags, AAInfo);
17953   }
17954 
17955   // Replace all merged stores with the new store.
17956   for (unsigned i = 0; i < NumStores; ++i)
17957     CombineTo(StoreNodes[i].MemNode, NewStore);
17958 
17959   AddToWorklist(NewChain.getNode());
17960   return true;
17961 }
17962 
17963 void DAGCombiner::getStoreMergeCandidates(
17964     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
17965     SDNode *&RootNode) {
17966   // This holds the base pointer, index, and the offset in bytes from the base
17967   // pointer. We must have a base and an offset. Do not handle stores to undef
17968   // base pointers.
17969   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
17970   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
17971     return;
17972 
17973   SDValue Val = peekThroughBitcasts(St->getValue());
17974   StoreSource StoreSrc = getStoreSource(Val);
17975   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
17976 
17977   // Match on loadbaseptr if relevant.
17978   EVT MemVT = St->getMemoryVT();
17979   BaseIndexOffset LBasePtr;
17980   EVT LoadVT;
17981   if (StoreSrc == StoreSource::Load) {
17982     auto *Ld = cast<LoadSDNode>(Val);
17983     LBasePtr = BaseIndexOffset::match(Ld, DAG);
17984     LoadVT = Ld->getMemoryVT();
17985     // Load and store should be the same type.
17986     if (MemVT != LoadVT)
17987       return;
17988     // Loads must only have one use.
17989     if (!Ld->hasNUsesOfValue(1, 0))
17990       return;
17991     // The memory operands must not be volatile/indexed/atomic.
17992     // TODO: May be able to relax for unordered atomics (see D66309)
17993     if (!Ld->isSimple() || Ld->isIndexed())
17994       return;
17995   }
17996   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
17997                             int64_t &Offset) -> bool {
17998     // The memory operands must not be volatile/indexed/atomic.
17999     // TODO: May be able to relax for unordered atomics (see D66309)
18000     if (!Other->isSimple() || Other->isIndexed())
18001       return false;
18002     // Don't mix temporal stores with non-temporal stores.
18003     if (St->isNonTemporal() != Other->isNonTemporal())
18004       return false;
18005     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
18006     // Allow merging constants of different types as integers.
18007     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
18008                                            : Other->getMemoryVT() != MemVT;
18009     switch (StoreSrc) {
18010     case StoreSource::Load: {
18011       if (NoTypeMatch)
18012         return false;
18013       // The Load's Base Ptr must also match.
18014       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
18015       if (!OtherLd)
18016         return false;
18017       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
18018       if (LoadVT != OtherLd->getMemoryVT())
18019         return false;
18020       // Loads must only have one use.
18021       if (!OtherLd->hasNUsesOfValue(1, 0))
18022         return false;
18023       // The memory operands must not be volatile/indexed/atomic.
18024       // TODO: May be able to relax for unordered atomics (see D66309)
18025       if (!OtherLd->isSimple() || OtherLd->isIndexed())
18026         return false;
18027       // Don't mix temporal loads with non-temporal loads.
18028       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
18029         return false;
18030       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
18031         return false;
18032       break;
18033     }
18034     case StoreSource::Constant:
18035       if (NoTypeMatch)
18036         return false;
18037       if (!isIntOrFPConstant(OtherBC))
18038         return false;
18039       break;
18040     case StoreSource::Extract:
18041       // Do not merge truncated stores here.
18042       if (Other->isTruncatingStore())
18043         return false;
18044       if (!MemVT.bitsEq(OtherBC.getValueType()))
18045         return false;
18046       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
18047           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18048         return false;
18049       break;
18050     default:
18051       llvm_unreachable("Unhandled store source for merging");
18052     }
18053     Ptr = BaseIndexOffset::match(Other, DAG);
18054     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
18055   };
18056 
18057   // Check if the pair of StoreNode and the RootNode already bail out many
18058   // times which is over the limit in dependence check.
18059   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
18060                                         SDNode *RootNode) -> bool {
18061     auto RootCount = StoreRootCountMap.find(StoreNode);
18062     return RootCount != StoreRootCountMap.end() &&
18063            RootCount->second.first == RootNode &&
18064            RootCount->second.second > StoreMergeDependenceLimit;
18065   };
18066 
18067   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
18068     // This must be a chain use.
18069     if (UseIter.getOperandNo() != 0)
18070       return;
18071     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
18072       BaseIndexOffset Ptr;
18073       int64_t PtrDiff;
18074       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
18075           !OverLimitInDependenceCheck(OtherStore, RootNode))
18076         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
18077     }
18078   };
18079 
18080   // We looking for a root node which is an ancestor to all mergable
18081   // stores. We search up through a load, to our root and then down
18082   // through all children. For instance we will find Store{1,2,3} if
18083   // St is Store1, Store2. or Store3 where the root is not a load
18084   // which always true for nonvolatile ops. TODO: Expand
18085   // the search to find all valid candidates through multiple layers of loads.
18086   //
18087   // Root
18088   // |-------|-------|
18089   // Load    Load    Store3
18090   // |       |
18091   // Store1   Store2
18092   //
18093   // FIXME: We should be able to climb and
18094   // descend TokenFactors to find candidates as well.
18095 
18096   RootNode = St->getChain().getNode();
18097 
18098   unsigned NumNodesExplored = 0;
18099   const unsigned MaxSearchNodes = 1024;
18100   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
18101     RootNode = Ldn->getChain().getNode();
18102     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
18103          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
18104       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
18105         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
18106           TryToAddCandidate(I2);
18107       }
18108       // Check stores that depend on the root (e.g. Store 3 in the chart above).
18109       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
18110         TryToAddCandidate(I);
18111       }
18112     }
18113   } else {
18114     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
18115          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
18116       TryToAddCandidate(I);
18117   }
18118 }
18119 
18120 // We need to check that merging these stores does not cause a loop in the
18121 // DAG. Any store candidate may depend on another candidate indirectly through
18122 // its operands. Check in parallel by searching up from operands of candidates.
18123 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
18124     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
18125     SDNode *RootNode) {
18126   // FIXME: We should be able to truncate a full search of
18127   // predecessors by doing a BFS and keeping tabs the originating
18128   // stores from which worklist nodes come from in a similar way to
18129   // TokenFactor simplfication.
18130 
18131   SmallPtrSet<const SDNode *, 32> Visited;
18132   SmallVector<const SDNode *, 8> Worklist;
18133 
18134   // RootNode is a predecessor to all candidates so we need not search
18135   // past it. Add RootNode (peeking through TokenFactors). Do not count
18136   // these towards size check.
18137 
18138   Worklist.push_back(RootNode);
18139   while (!Worklist.empty()) {
18140     auto N = Worklist.pop_back_val();
18141     if (!Visited.insert(N).second)
18142       continue; // Already present in Visited.
18143     if (N->getOpcode() == ISD::TokenFactor) {
18144       for (SDValue Op : N->ops())
18145         Worklist.push_back(Op.getNode());
18146     }
18147   }
18148 
18149   // Don't count pruning nodes towards max.
18150   unsigned int Max = 1024 + Visited.size();
18151   // Search Ops of store candidates.
18152   for (unsigned i = 0; i < NumStores; ++i) {
18153     SDNode *N = StoreNodes[i].MemNode;
18154     // Of the 4 Store Operands:
18155     //   * Chain (Op 0) -> We have already considered these
18156     //                     in candidate selection, but only by following the
18157     //                     chain dependencies. We could still have a chain
18158     //                     dependency to a load, that has a non-chain dep to
18159     //                     another load, that depends on a store, etc. So it is
18160     //                     possible to have dependencies that consist of a mix
18161     //                     of chain and non-chain deps, and we need to include
18162     //                     chain operands in the analysis here..
18163     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
18164     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
18165     //                       but aren't necessarily fromt the same base node, so
18166     //                       cycles possible (e.g. via indexed store).
18167     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
18168     //               non-indexed stores). Not constant on all targets (e.g. ARM)
18169     //               and so can participate in a cycle.
18170     for (unsigned j = 0; j < N->getNumOperands(); ++j)
18171       Worklist.push_back(N->getOperand(j).getNode());
18172   }
18173   // Search through DAG. We can stop early if we find a store node.
18174   for (unsigned i = 0; i < NumStores; ++i)
18175     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
18176                                      Max)) {
18177       // If the searching bail out, record the StoreNode and RootNode in the
18178       // StoreRootCountMap. If we have seen the pair many times over a limit,
18179       // we won't add the StoreNode into StoreNodes set again.
18180       if (Visited.size() >= Max) {
18181         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
18182         if (RootCount.first == RootNode)
18183           RootCount.second++;
18184         else
18185           RootCount = {RootNode, 1};
18186       }
18187       return false;
18188     }
18189   return true;
18190 }
18191 
18192 unsigned
18193 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
18194                                   int64_t ElementSizeBytes) const {
18195   while (true) {
18196     // Find a store past the width of the first store.
18197     size_t StartIdx = 0;
18198     while ((StartIdx + 1 < StoreNodes.size()) &&
18199            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
18200               StoreNodes[StartIdx + 1].OffsetFromBase)
18201       ++StartIdx;
18202 
18203     // Bail if we don't have enough candidates to merge.
18204     if (StartIdx + 1 >= StoreNodes.size())
18205       return 0;
18206 
18207     // Trim stores that overlapped with the first store.
18208     if (StartIdx)
18209       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
18210 
18211     // Scan the memory operations on the chain and find the first
18212     // non-consecutive store memory address.
18213     unsigned NumConsecutiveStores = 1;
18214     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
18215     // Check that the addresses are consecutive starting from the second
18216     // element in the list of stores.
18217     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
18218       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
18219       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18220         break;
18221       NumConsecutiveStores = i + 1;
18222     }
18223     if (NumConsecutiveStores > 1)
18224       return NumConsecutiveStores;
18225 
18226     // There are no consecutive stores at the start of the list.
18227     // Remove the first store and try again.
18228     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
18229   }
18230 }
18231 
18232 bool DAGCombiner::tryStoreMergeOfConstants(
18233     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18234     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
18235   LLVMContext &Context = *DAG.getContext();
18236   const DataLayout &DL = DAG.getDataLayout();
18237   int64_t ElementSizeBytes = MemVT.getStoreSize();
18238   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18239   bool MadeChange = false;
18240 
18241   // Store the constants into memory as one consecutive store.
18242   while (NumConsecutiveStores >= 2) {
18243     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18244     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18245     Align FirstStoreAlign = FirstInChain->getAlign();
18246     unsigned LastLegalType = 1;
18247     unsigned LastLegalVectorType = 1;
18248     bool LastIntegerTrunc = false;
18249     bool NonZero = false;
18250     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
18251     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18252       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
18253       SDValue StoredVal = ST->getValue();
18254       bool IsElementZero = false;
18255       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
18256         IsElementZero = C->isZero();
18257       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
18258         IsElementZero = C->getConstantFPValue()->isNullValue();
18259       if (IsElementZero) {
18260         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
18261           FirstZeroAfterNonZero = i;
18262       }
18263       NonZero |= !IsElementZero;
18264 
18265       // Find a legal type for the constant store.
18266       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18267       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18268       bool IsFast = false;
18269 
18270       // Break early when size is too large to be legal.
18271       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18272         break;
18273 
18274       if (TLI.isTypeLegal(StoreTy) &&
18275           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18276                                DAG.getMachineFunction()) &&
18277           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18278                                  *FirstInChain->getMemOperand(), &IsFast) &&
18279           IsFast) {
18280         LastIntegerTrunc = false;
18281         LastLegalType = i + 1;
18282         // Or check whether a truncstore is legal.
18283       } else if (TLI.getTypeAction(Context, StoreTy) ==
18284                  TargetLowering::TypePromoteInteger) {
18285         EVT LegalizedStoredValTy =
18286             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
18287         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18288             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18289                                  DAG.getMachineFunction()) &&
18290             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18291                                    *FirstInChain->getMemOperand(), &IsFast) &&
18292             IsFast) {
18293           LastIntegerTrunc = true;
18294           LastLegalType = i + 1;
18295         }
18296       }
18297 
18298       // We only use vectors if the constant is known to be zero or the
18299       // target allows it and the function is not marked with the
18300       // noimplicitfloat attribute.
18301       if ((!NonZero ||
18302            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
18303           AllowVectors) {
18304         // Find a legal type for the vector store.
18305         unsigned Elts = (i + 1) * NumMemElts;
18306         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18307         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
18308             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18309             TLI.allowsMemoryAccess(Context, DL, Ty,
18310                                    *FirstInChain->getMemOperand(), &IsFast) &&
18311             IsFast)
18312           LastLegalVectorType = i + 1;
18313       }
18314     }
18315 
18316     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
18317     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
18318     bool UseTrunc = LastIntegerTrunc && !UseVector;
18319 
18320     // Check if we found a legal integer type that creates a meaningful
18321     // merge.
18322     if (NumElem < 2) {
18323       // We know that candidate stores are in order and of correct
18324       // shape. While there is no mergeable sequence from the
18325       // beginning one may start later in the sequence. The only
18326       // reason a merge of size N could have failed where another of
18327       // the same size would not have, is if the alignment has
18328       // improved or we've dropped a non-zero value. Drop as many
18329       // candidates as we can here.
18330       unsigned NumSkip = 1;
18331       while ((NumSkip < NumConsecutiveStores) &&
18332              (NumSkip < FirstZeroAfterNonZero) &&
18333              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18334         NumSkip++;
18335 
18336       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18337       NumConsecutiveStores -= NumSkip;
18338       continue;
18339     }
18340 
18341     // Check that we can merge these candidates without causing a cycle.
18342     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18343                                                   RootNode)) {
18344       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18345       NumConsecutiveStores -= NumElem;
18346       continue;
18347     }
18348 
18349     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
18350                                                   /*IsConstantSrc*/ true,
18351                                                   UseVector, UseTrunc);
18352 
18353     // Remove merged stores for next iteration.
18354     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18355     NumConsecutiveStores -= NumElem;
18356   }
18357   return MadeChange;
18358 }
18359 
18360 bool DAGCombiner::tryStoreMergeOfExtracts(
18361     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
18362     EVT MemVT, SDNode *RootNode) {
18363   LLVMContext &Context = *DAG.getContext();
18364   const DataLayout &DL = DAG.getDataLayout();
18365   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18366   bool MadeChange = false;
18367 
18368   // Loop on Consecutive Stores on success.
18369   while (NumConsecutiveStores >= 2) {
18370     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18371     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18372     Align FirstStoreAlign = FirstInChain->getAlign();
18373     unsigned NumStoresToMerge = 1;
18374     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18375       // Find a legal type for the vector store.
18376       unsigned Elts = (i + 1) * NumMemElts;
18377       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
18378       bool IsFast = false;
18379 
18380       // Break early when size is too large to be legal.
18381       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
18382         break;
18383 
18384       if (TLI.isTypeLegal(Ty) &&
18385           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
18386           TLI.allowsMemoryAccess(Context, DL, Ty,
18387                                  *FirstInChain->getMemOperand(), &IsFast) &&
18388           IsFast)
18389         NumStoresToMerge = i + 1;
18390     }
18391 
18392     // Check if we found a legal integer type creating a meaningful
18393     // merge.
18394     if (NumStoresToMerge < 2) {
18395       // We know that candidate stores are in order and of correct
18396       // shape. While there is no mergeable sequence from the
18397       // beginning one may start later in the sequence. The only
18398       // reason a merge of size N could have failed where another of
18399       // the same size would not have, is if the alignment has
18400       // improved. Drop as many candidates as we can here.
18401       unsigned NumSkip = 1;
18402       while ((NumSkip < NumConsecutiveStores) &&
18403              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18404         NumSkip++;
18405 
18406       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18407       NumConsecutiveStores -= NumSkip;
18408       continue;
18409     }
18410 
18411     // Check that we can merge these candidates without causing a cycle.
18412     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
18413                                                   RootNode)) {
18414       StoreNodes.erase(StoreNodes.begin(),
18415                        StoreNodes.begin() + NumStoresToMerge);
18416       NumConsecutiveStores -= NumStoresToMerge;
18417       continue;
18418     }
18419 
18420     MadeChange |= mergeStoresOfConstantsOrVecElts(
18421         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
18422         /*UseVector*/ true, /*UseTrunc*/ false);
18423 
18424     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
18425     NumConsecutiveStores -= NumStoresToMerge;
18426   }
18427   return MadeChange;
18428 }
18429 
18430 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
18431                                        unsigned NumConsecutiveStores, EVT MemVT,
18432                                        SDNode *RootNode, bool AllowVectors,
18433                                        bool IsNonTemporalStore,
18434                                        bool IsNonTemporalLoad) {
18435   LLVMContext &Context = *DAG.getContext();
18436   const DataLayout &DL = DAG.getDataLayout();
18437   int64_t ElementSizeBytes = MemVT.getStoreSize();
18438   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
18439   bool MadeChange = false;
18440 
18441   // Look for load nodes which are used by the stored values.
18442   SmallVector<MemOpLink, 8> LoadNodes;
18443 
18444   // Find acceptable loads. Loads need to have the same chain (token factor),
18445   // must not be zext, volatile, indexed, and they must be consecutive.
18446   BaseIndexOffset LdBasePtr;
18447 
18448   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
18449     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
18450     SDValue Val = peekThroughBitcasts(St->getValue());
18451     LoadSDNode *Ld = cast<LoadSDNode>(Val);
18452 
18453     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
18454     // If this is not the first ptr that we check.
18455     int64_t LdOffset = 0;
18456     if (LdBasePtr.getBase().getNode()) {
18457       // The base ptr must be the same.
18458       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
18459         break;
18460     } else {
18461       // Check that all other base pointers are the same as this one.
18462       LdBasePtr = LdPtr;
18463     }
18464 
18465     // We found a potential memory operand to merge.
18466     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
18467   }
18468 
18469   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
18470     Align RequiredAlignment;
18471     bool NeedRotate = false;
18472     if (LoadNodes.size() == 2) {
18473       // If we have load/store pair instructions and we only have two values,
18474       // don't bother merging.
18475       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
18476           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
18477         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
18478         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
18479         break;
18480       }
18481       // If the loads are reversed, see if we can rotate the halves into place.
18482       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
18483       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
18484       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
18485       if (Offset0 - Offset1 == ElementSizeBytes &&
18486           (hasOperation(ISD::ROTL, PairVT) ||
18487            hasOperation(ISD::ROTR, PairVT))) {
18488         std::swap(LoadNodes[0], LoadNodes[1]);
18489         NeedRotate = true;
18490       }
18491     }
18492     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
18493     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
18494     Align FirstStoreAlign = FirstInChain->getAlign();
18495     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
18496 
18497     // Scan the memory operations on the chain and find the first
18498     // non-consecutive load memory address. These variables hold the index in
18499     // the store node array.
18500 
18501     unsigned LastConsecutiveLoad = 1;
18502 
18503     // This variable refers to the size and not index in the array.
18504     unsigned LastLegalVectorType = 1;
18505     unsigned LastLegalIntegerType = 1;
18506     bool isDereferenceable = true;
18507     bool DoIntegerTruncate = false;
18508     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
18509     SDValue LoadChain = FirstLoad->getChain();
18510     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
18511       // All loads must share the same chain.
18512       if (LoadNodes[i].MemNode->getChain() != LoadChain)
18513         break;
18514 
18515       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
18516       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
18517         break;
18518       LastConsecutiveLoad = i;
18519 
18520       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
18521         isDereferenceable = false;
18522 
18523       // Find a legal type for the vector store.
18524       unsigned Elts = (i + 1) * NumMemElts;
18525       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18526 
18527       // Break early when size is too large to be legal.
18528       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
18529         break;
18530 
18531       bool IsFastSt = false;
18532       bool IsFastLd = false;
18533       // Don't try vector types if we need a rotate. We may still fail the
18534       // legality checks for the integer type, but we can't handle the rotate
18535       // case with vectors.
18536       // FIXME: We could use a shuffle in place of the rotate.
18537       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
18538           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18539                                DAG.getMachineFunction()) &&
18540           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18541                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18542           IsFastSt &&
18543           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18544                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18545           IsFastLd) {
18546         LastLegalVectorType = i + 1;
18547       }
18548 
18549       // Find a legal type for the integer store.
18550       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
18551       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
18552       if (TLI.isTypeLegal(StoreTy) &&
18553           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
18554                                DAG.getMachineFunction()) &&
18555           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18556                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
18557           IsFastSt &&
18558           TLI.allowsMemoryAccess(Context, DL, StoreTy,
18559                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
18560           IsFastLd) {
18561         LastLegalIntegerType = i + 1;
18562         DoIntegerTruncate = false;
18563         // Or check whether a truncstore and extload is legal.
18564       } else if (TLI.getTypeAction(Context, StoreTy) ==
18565                  TargetLowering::TypePromoteInteger) {
18566         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
18567         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
18568             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
18569                                  DAG.getMachineFunction()) &&
18570             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18571             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
18572             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
18573             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18574                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
18575             IsFastSt &&
18576             TLI.allowsMemoryAccess(Context, DL, StoreTy,
18577                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
18578             IsFastLd) {
18579           LastLegalIntegerType = i + 1;
18580           DoIntegerTruncate = true;
18581         }
18582       }
18583     }
18584 
18585     // Only use vector types if the vector type is larger than the integer
18586     // type. If they are the same, use integers.
18587     bool UseVectorTy =
18588         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
18589     unsigned LastLegalType =
18590         std::max(LastLegalVectorType, LastLegalIntegerType);
18591 
18592     // We add +1 here because the LastXXX variables refer to location while
18593     // the NumElem refers to array/index size.
18594     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
18595     NumElem = std::min(LastLegalType, NumElem);
18596     Align FirstLoadAlign = FirstLoad->getAlign();
18597 
18598     if (NumElem < 2) {
18599       // We know that candidate stores are in order and of correct
18600       // shape. While there is no mergeable sequence from the
18601       // beginning one may start later in the sequence. The only
18602       // reason a merge of size N could have failed where another of
18603       // the same size would not have is if the alignment or either
18604       // the load or store has improved. Drop as many candidates as we
18605       // can here.
18606       unsigned NumSkip = 1;
18607       while ((NumSkip < LoadNodes.size()) &&
18608              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
18609              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
18610         NumSkip++;
18611       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
18612       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
18613       NumConsecutiveStores -= NumSkip;
18614       continue;
18615     }
18616 
18617     // Check that we can merge these candidates without causing a cycle.
18618     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
18619                                                   RootNode)) {
18620       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18621       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18622       NumConsecutiveStores -= NumElem;
18623       continue;
18624     }
18625 
18626     // Find if it is better to use vectors or integers to load and store
18627     // to memory.
18628     EVT JointMemOpVT;
18629     if (UseVectorTy) {
18630       // Find a legal type for the vector store.
18631       unsigned Elts = NumElem * NumMemElts;
18632       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
18633     } else {
18634       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
18635       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
18636     }
18637 
18638     SDLoc LoadDL(LoadNodes[0].MemNode);
18639     SDLoc StoreDL(StoreNodes[0].MemNode);
18640 
18641     // The merged loads are required to have the same incoming chain, so
18642     // using the first's chain is acceptable.
18643 
18644     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
18645     AddToWorklist(NewStoreChain.getNode());
18646 
18647     MachineMemOperand::Flags LdMMOFlags =
18648         isDereferenceable ? MachineMemOperand::MODereferenceable
18649                           : MachineMemOperand::MONone;
18650     if (IsNonTemporalLoad)
18651       LdMMOFlags |= MachineMemOperand::MONonTemporal;
18652 
18653     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
18654                                               ? MachineMemOperand::MONonTemporal
18655                                               : MachineMemOperand::MONone;
18656 
18657     SDValue NewLoad, NewStore;
18658     if (UseVectorTy || !DoIntegerTruncate) {
18659       NewLoad = DAG.getLoad(
18660           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
18661           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
18662       SDValue StoreOp = NewLoad;
18663       if (NeedRotate) {
18664         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
18665         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
18666                "Unexpected type for rotate-able load pair");
18667         SDValue RotAmt =
18668             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
18669         // Target can convert to the identical ROTR if it does not have ROTL.
18670         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
18671       }
18672       NewStore = DAG.getStore(
18673           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
18674           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
18675     } else { // This must be the truncstore/extload case
18676       EVT ExtendedTy =
18677           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
18678       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
18679                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
18680                                FirstLoad->getPointerInfo(), JointMemOpVT,
18681                                FirstLoadAlign, LdMMOFlags);
18682       NewStore = DAG.getTruncStore(
18683           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
18684           FirstInChain->getPointerInfo(), JointMemOpVT,
18685           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
18686     }
18687 
18688     // Transfer chain users from old loads to the new load.
18689     for (unsigned i = 0; i < NumElem; ++i) {
18690       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
18691       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
18692                                     SDValue(NewLoad.getNode(), 1));
18693     }
18694 
18695     // Replace all stores with the new store. Recursively remove corresponding
18696     // values if they are no longer used.
18697     for (unsigned i = 0; i < NumElem; ++i) {
18698       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
18699       CombineTo(StoreNodes[i].MemNode, NewStore);
18700       if (Val->use_empty())
18701         recursivelyDeleteUnusedNodes(Val.getNode());
18702     }
18703 
18704     MadeChange = true;
18705     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
18706     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
18707     NumConsecutiveStores -= NumElem;
18708   }
18709   return MadeChange;
18710 }
18711 
18712 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
18713   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
18714     return false;
18715 
18716   // TODO: Extend this function to merge stores of scalable vectors.
18717   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
18718   // store since we know <vscale x 16 x i8> is exactly twice as large as
18719   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
18720   EVT MemVT = St->getMemoryVT();
18721   if (MemVT.isScalableVector())
18722     return false;
18723   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
18724     return false;
18725 
18726   // This function cannot currently deal with non-byte-sized memory sizes.
18727   int64_t ElementSizeBytes = MemVT.getStoreSize();
18728   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
18729     return false;
18730 
18731   // Do not bother looking at stored values that are not constants, loads, or
18732   // extracted vector elements.
18733   SDValue StoredVal = peekThroughBitcasts(St->getValue());
18734   const StoreSource StoreSrc = getStoreSource(StoredVal);
18735   if (StoreSrc == StoreSource::Unknown)
18736     return false;
18737 
18738   SmallVector<MemOpLink, 8> StoreNodes;
18739   SDNode *RootNode;
18740   // Find potential store merge candidates by searching through chain sub-DAG
18741   getStoreMergeCandidates(St, StoreNodes, RootNode);
18742 
18743   // Check if there is anything to merge.
18744   if (StoreNodes.size() < 2)
18745     return false;
18746 
18747   // Sort the memory operands according to their distance from the
18748   // base pointer.
18749   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
18750     return LHS.OffsetFromBase < RHS.OffsetFromBase;
18751   });
18752 
18753   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
18754       Attribute::NoImplicitFloat);
18755   bool IsNonTemporalStore = St->isNonTemporal();
18756   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
18757                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
18758 
18759   // Store Merge attempts to merge the lowest stores. This generally
18760   // works out as if successful, as the remaining stores are checked
18761   // after the first collection of stores is merged. However, in the
18762   // case that a non-mergeable store is found first, e.g., {p[-2],
18763   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
18764   // mergeable cases. To prevent this, we prune such stores from the
18765   // front of StoreNodes here.
18766   bool MadeChange = false;
18767   while (StoreNodes.size() > 1) {
18768     unsigned NumConsecutiveStores =
18769         getConsecutiveStores(StoreNodes, ElementSizeBytes);
18770     // There are no more stores in the list to examine.
18771     if (NumConsecutiveStores == 0)
18772       return MadeChange;
18773 
18774     // We have at least 2 consecutive stores. Try to merge them.
18775     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
18776     switch (StoreSrc) {
18777     case StoreSource::Constant:
18778       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
18779                                              MemVT, RootNode, AllowVectors);
18780       break;
18781 
18782     case StoreSource::Extract:
18783       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
18784                                             MemVT, RootNode);
18785       break;
18786 
18787     case StoreSource::Load:
18788       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
18789                                          MemVT, RootNode, AllowVectors,
18790                                          IsNonTemporalStore, IsNonTemporalLoad);
18791       break;
18792 
18793     default:
18794       llvm_unreachable("Unhandled store source type");
18795     }
18796   }
18797   return MadeChange;
18798 }
18799 
18800 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
18801   SDLoc SL(ST);
18802   SDValue ReplStore;
18803 
18804   // Replace the chain to avoid dependency.
18805   if (ST->isTruncatingStore()) {
18806     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
18807                                   ST->getBasePtr(), ST->getMemoryVT(),
18808                                   ST->getMemOperand());
18809   } else {
18810     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
18811                              ST->getMemOperand());
18812   }
18813 
18814   // Create token to keep both nodes around.
18815   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
18816                               MVT::Other, ST->getChain(), ReplStore);
18817 
18818   // Make sure the new and old chains are cleaned up.
18819   AddToWorklist(Token.getNode());
18820 
18821   // Don't add users to work list.
18822   return CombineTo(ST, Token, false);
18823 }
18824 
18825 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
18826   SDValue Value = ST->getValue();
18827   if (Value.getOpcode() == ISD::TargetConstantFP)
18828     return SDValue();
18829 
18830   if (!ISD::isNormalStore(ST))
18831     return SDValue();
18832 
18833   SDLoc DL(ST);
18834 
18835   SDValue Chain = ST->getChain();
18836   SDValue Ptr = ST->getBasePtr();
18837 
18838   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
18839 
18840   // NOTE: If the original store is volatile, this transform must not increase
18841   // the number of stores.  For example, on x86-32 an f64 can be stored in one
18842   // processor operation but an i64 (which is not legal) requires two.  So the
18843   // transform should not be done in this case.
18844 
18845   SDValue Tmp;
18846   switch (CFP->getSimpleValueType(0).SimpleTy) {
18847   default:
18848     llvm_unreachable("Unknown FP type");
18849   case MVT::f16:    // We don't do this for these yet.
18850   case MVT::bf16:
18851   case MVT::f80:
18852   case MVT::f128:
18853   case MVT::ppcf128:
18854     return SDValue();
18855   case MVT::f32:
18856     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
18857         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18858       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
18859                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
18860                             MVT::i32);
18861       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
18862     }
18863 
18864     return SDValue();
18865   case MVT::f64:
18866     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
18867          ST->isSimple()) ||
18868         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
18869       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
18870                             getZExtValue(), SDLoc(CFP), MVT::i64);
18871       return DAG.getStore(Chain, DL, Tmp,
18872                           Ptr, ST->getMemOperand());
18873     }
18874 
18875     if (ST->isSimple() &&
18876         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
18877       // Many FP stores are not made apparent until after legalize, e.g. for
18878       // argument passing.  Since this is so common, custom legalize the
18879       // 64-bit integer store into two 32-bit stores.
18880       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
18881       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
18882       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
18883       if (DAG.getDataLayout().isBigEndian())
18884         std::swap(Lo, Hi);
18885 
18886       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18887       AAMDNodes AAInfo = ST->getAAInfo();
18888 
18889       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18890                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18891       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
18892       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
18893                                  ST->getPointerInfo().getWithOffset(4),
18894                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
18895       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
18896                          St0, St1);
18897     }
18898 
18899     return SDValue();
18900   }
18901 }
18902 
18903 SDValue DAGCombiner::visitSTORE(SDNode *N) {
18904   StoreSDNode *ST  = cast<StoreSDNode>(N);
18905   SDValue Chain = ST->getChain();
18906   SDValue Value = ST->getValue();
18907   SDValue Ptr   = ST->getBasePtr();
18908 
18909   // If this is a store of a bit convert, store the input value if the
18910   // resultant store does not need a higher alignment than the original.
18911   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
18912       ST->isUnindexed()) {
18913     EVT SVT = Value.getOperand(0).getValueType();
18914     // If the store is volatile, we only want to change the store type if the
18915     // resulting store is legal. Otherwise we might increase the number of
18916     // memory accesses. We don't care if the original type was legal or not
18917     // as we assume software couldn't rely on the number of accesses of an
18918     // illegal type.
18919     // TODO: May be able to relax for unordered atomics (see D66309)
18920     if (((!LegalOperations && ST->isSimple()) ||
18921          TLI.isOperationLegal(ISD::STORE, SVT)) &&
18922         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
18923                                      DAG, *ST->getMemOperand())) {
18924       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18925                           ST->getMemOperand());
18926     }
18927   }
18928 
18929   // Turn 'store undef, Ptr' -> nothing.
18930   if (Value.isUndef() && ST->isUnindexed())
18931     return Chain;
18932 
18933   // Try to infer better alignment information than the store already has.
18934   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
18935     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18936       if (*Alignment > ST->getAlign() &&
18937           isAligned(*Alignment, ST->getSrcValueOffset())) {
18938         SDValue NewStore =
18939             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
18940                               ST->getMemoryVT(), *Alignment,
18941                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
18942         // NewStore will always be N as we are only refining the alignment
18943         assert(NewStore.getNode() == N);
18944         (void)NewStore;
18945       }
18946     }
18947   }
18948 
18949   // Try transforming a pair floating point load / store ops to integer
18950   // load / store ops.
18951   if (SDValue NewST = TransformFPLoadStorePair(N))
18952     return NewST;
18953 
18954   // Try transforming several stores into STORE (BSWAP).
18955   if (SDValue Store = mergeTruncStores(ST))
18956     return Store;
18957 
18958   if (ST->isUnindexed()) {
18959     // Walk up chain skipping non-aliasing memory nodes, on this store and any
18960     // adjacent stores.
18961     if (findBetterNeighborChains(ST)) {
18962       // replaceStoreChain uses CombineTo, which handled all of the worklist
18963       // manipulation. Return the original node to not do anything else.
18964       return SDValue(ST, 0);
18965     }
18966     Chain = ST->getChain();
18967   }
18968 
18969   // FIXME: is there such a thing as a truncating indexed store?
18970   if (ST->isTruncatingStore() && ST->isUnindexed() &&
18971       Value.getValueType().isInteger() &&
18972       (!isa<ConstantSDNode>(Value) ||
18973        !cast<ConstantSDNode>(Value)->isOpaque())) {
18974     // Convert a truncating store of a extension into a standard store.
18975     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
18976          Value.getOpcode() == ISD::SIGN_EXTEND ||
18977          Value.getOpcode() == ISD::ANY_EXTEND) &&
18978         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
18979         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
18980       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
18981                           ST->getMemOperand());
18982 
18983     APInt TruncDemandedBits =
18984         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
18985                              ST->getMemoryVT().getScalarSizeInBits());
18986 
18987     // See if we can simplify the input to this truncstore with knowledge that
18988     // only the low bits are being used.  For example:
18989     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
18990     AddToWorklist(Value.getNode());
18991     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
18992       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
18993                                ST->getMemOperand());
18994 
18995     // Otherwise, see if we can simplify the operation with
18996     // SimplifyDemandedBits, which only works if the value has a single use.
18997     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
18998       // Re-visit the store if anything changed and the store hasn't been merged
18999       // with another node (N is deleted) SimplifyDemandedBits will add Value's
19000       // node back to the worklist if necessary, but we also need to re-visit
19001       // the Store node itself.
19002       if (N->getOpcode() != ISD::DELETED_NODE)
19003         AddToWorklist(N);
19004       return SDValue(N, 0);
19005     }
19006   }
19007 
19008   // If this is a load followed by a store to the same location, then the store
19009   // is dead/noop.
19010   // TODO: Can relax for unordered atomics (see D66309)
19011   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
19012     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
19013         ST->isUnindexed() && ST->isSimple() &&
19014         Ld->getAddressSpace() == ST->getAddressSpace() &&
19015         // There can't be any side effects between the load and store, such as
19016         // a call or store.
19017         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
19018       // The store is dead, remove it.
19019       return Chain;
19020     }
19021   }
19022 
19023   // TODO: Can relax for unordered atomics (see D66309)
19024   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
19025     if (ST->isUnindexed() && ST->isSimple() &&
19026         ST1->isUnindexed() && ST1->isSimple()) {
19027       if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr &&
19028           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
19029           ST->getAddressSpace() == ST1->getAddressSpace()) {
19030         // If this is a store followed by a store with the same value to the
19031         // same location, then the store is dead/noop.
19032         return Chain;
19033       }
19034 
19035       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
19036           !ST1->getBasePtr().isUndef() &&
19037           // BaseIndexOffset and the code below requires knowing the size
19038           // of a vector, so bail out if MemoryVT is scalable.
19039           !ST->getMemoryVT().isScalableVector() &&
19040           !ST1->getMemoryVT().isScalableVector() &&
19041           ST->getAddressSpace() == ST1->getAddressSpace()) {
19042         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
19043         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
19044         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
19045         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
19046         // If this is a store who's preceding store to a subset of the current
19047         // location and no one other node is chained to that store we can
19048         // effectively drop the store. Do not remove stores to undef as they may
19049         // be used as data sinks.
19050         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
19051           CombineTo(ST1, ST1->getChain());
19052           return SDValue();
19053         }
19054       }
19055     }
19056   }
19057 
19058   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
19059   // truncating store.  We can do this even if this is already a truncstore.
19060   if ((Value.getOpcode() == ISD::FP_ROUND ||
19061        Value.getOpcode() == ISD::TRUNCATE) &&
19062       Value->hasOneUse() && ST->isUnindexed() &&
19063       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
19064                                ST->getMemoryVT(), LegalOperations)) {
19065     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
19066                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
19067   }
19068 
19069   // Always perform this optimization before types are legal. If the target
19070   // prefers, also try this after legalization to catch stores that were created
19071   // by intrinsics or other nodes.
19072   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
19073     while (true) {
19074       // There can be multiple store sequences on the same chain.
19075       // Keep trying to merge store sequences until we are unable to do so
19076       // or until we merge the last store on the chain.
19077       bool Changed = mergeConsecutiveStores(ST);
19078       if (!Changed) break;
19079       // Return N as merge only uses CombineTo and no worklist clean
19080       // up is necessary.
19081       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
19082         return SDValue(N, 0);
19083     }
19084   }
19085 
19086   // Try transforming N to an indexed store.
19087   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
19088     return SDValue(N, 0);
19089 
19090   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
19091   //
19092   // Make sure to do this only after attempting to merge stores in order to
19093   //  avoid changing the types of some subset of stores due to visit order,
19094   //  preventing their merging.
19095   if (isa<ConstantFPSDNode>(ST->getValue())) {
19096     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
19097       return NewSt;
19098   }
19099 
19100   if (SDValue NewSt = splitMergedValStore(ST))
19101     return NewSt;
19102 
19103   return ReduceLoadOpStoreWidth(N);
19104 }
19105 
19106 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
19107   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
19108   if (!LifetimeEnd->hasOffset())
19109     return SDValue();
19110 
19111   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
19112                                         LifetimeEnd->getOffset(), false);
19113 
19114   // We walk up the chains to find stores.
19115   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
19116   while (!Chains.empty()) {
19117     SDValue Chain = Chains.pop_back_val();
19118     if (!Chain.hasOneUse())
19119       continue;
19120     switch (Chain.getOpcode()) {
19121     case ISD::TokenFactor:
19122       for (unsigned Nops = Chain.getNumOperands(); Nops;)
19123         Chains.push_back(Chain.getOperand(--Nops));
19124       break;
19125     case ISD::LIFETIME_START:
19126     case ISD::LIFETIME_END:
19127       // We can forward past any lifetime start/end that can be proven not to
19128       // alias the node.
19129       if (!mayAlias(Chain.getNode(), N))
19130         Chains.push_back(Chain.getOperand(0));
19131       break;
19132     case ISD::STORE: {
19133       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
19134       // TODO: Can relax for unordered atomics (see D66309)
19135       if (!ST->isSimple() || ST->isIndexed())
19136         continue;
19137       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
19138       // The bounds of a scalable store are not known until runtime, so this
19139       // store cannot be elided.
19140       if (StoreSize.isScalable())
19141         continue;
19142       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
19143       // If we store purely within object bounds just before its lifetime ends,
19144       // we can remove the store.
19145       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
19146                                    StoreSize.getFixedSize() * 8)) {
19147         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
19148                    dbgs() << "\nwithin LIFETIME_END of : ";
19149                    LifetimeEndBase.dump(); dbgs() << "\n");
19150         CombineTo(ST, ST->getChain());
19151         return SDValue(N, 0);
19152       }
19153     }
19154     }
19155   }
19156   return SDValue();
19157 }
19158 
19159 /// For the instruction sequence of store below, F and I values
19160 /// are bundled together as an i64 value before being stored into memory.
19161 /// Sometimes it is more efficent to generate separate stores for F and I,
19162 /// which can remove the bitwise instructions or sink them to colder places.
19163 ///
19164 ///   (store (or (zext (bitcast F to i32) to i64),
19165 ///              (shl (zext I to i64), 32)), addr)  -->
19166 ///   (store F, addr) and (store I, addr+4)
19167 ///
19168 /// Similarly, splitting for other merged store can also be beneficial, like:
19169 /// For pair of {i32, i32}, i64 store --> two i32 stores.
19170 /// For pair of {i32, i16}, i64 store --> two i32 stores.
19171 /// For pair of {i16, i16}, i32 store --> two i16 stores.
19172 /// For pair of {i16, i8},  i32 store --> two i16 stores.
19173 /// For pair of {i8, i8},   i16 store --> two i8 stores.
19174 ///
19175 /// We allow each target to determine specifically which kind of splitting is
19176 /// supported.
19177 ///
19178 /// The store patterns are commonly seen from the simple code snippet below
19179 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
19180 ///   void goo(const std::pair<int, float> &);
19181 ///   hoo() {
19182 ///     ...
19183 ///     goo(std::make_pair(tmp, ftmp));
19184 ///     ...
19185 ///   }
19186 ///
19187 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
19188   if (OptLevel == CodeGenOpt::None)
19189     return SDValue();
19190 
19191   // Can't change the number of memory accesses for a volatile store or break
19192   // atomicity for an atomic one.
19193   if (!ST->isSimple())
19194     return SDValue();
19195 
19196   SDValue Val = ST->getValue();
19197   SDLoc DL(ST);
19198 
19199   // Match OR operand.
19200   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
19201     return SDValue();
19202 
19203   // Match SHL operand and get Lower and Higher parts of Val.
19204   SDValue Op1 = Val.getOperand(0);
19205   SDValue Op2 = Val.getOperand(1);
19206   SDValue Lo, Hi;
19207   if (Op1.getOpcode() != ISD::SHL) {
19208     std::swap(Op1, Op2);
19209     if (Op1.getOpcode() != ISD::SHL)
19210       return SDValue();
19211   }
19212   Lo = Op2;
19213   Hi = Op1.getOperand(0);
19214   if (!Op1.hasOneUse())
19215     return SDValue();
19216 
19217   // Match shift amount to HalfValBitSize.
19218   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
19219   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
19220   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
19221     return SDValue();
19222 
19223   // Lo and Hi are zero-extended from int with size less equal than 32
19224   // to i64.
19225   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
19226       !Lo.getOperand(0).getValueType().isScalarInteger() ||
19227       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
19228       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
19229       !Hi.getOperand(0).getValueType().isScalarInteger() ||
19230       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
19231     return SDValue();
19232 
19233   // Use the EVT of low and high parts before bitcast as the input
19234   // of target query.
19235   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
19236                   ? Lo.getOperand(0).getValueType()
19237                   : Lo.getValueType();
19238   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
19239                    ? Hi.getOperand(0).getValueType()
19240                    : Hi.getValueType();
19241   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
19242     return SDValue();
19243 
19244   // Start to split store.
19245   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
19246   AAMDNodes AAInfo = ST->getAAInfo();
19247 
19248   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
19249   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
19250   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
19251   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
19252 
19253   SDValue Chain = ST->getChain();
19254   SDValue Ptr = ST->getBasePtr();
19255   // Lower value store.
19256   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
19257                              ST->getOriginalAlign(), MMOFlags, AAInfo);
19258   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
19259   // Higher value store.
19260   SDValue St1 = DAG.getStore(
19261       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
19262       ST->getOriginalAlign(), MMOFlags, AAInfo);
19263   return St1;
19264 }
19265 
19266 /// Convert a disguised subvector insertion into a shuffle:
19267 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
19268   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
19269          "Expected extract_vector_elt");
19270   SDValue InsertVal = N->getOperand(1);
19271   SDValue Vec = N->getOperand(0);
19272 
19273   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
19274   // InsIndex)
19275   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
19276   //   CONCAT_VECTORS.
19277   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
19278       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19279       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
19280     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
19281     ArrayRef<int> Mask = SVN->getMask();
19282 
19283     SDValue X = Vec.getOperand(0);
19284     SDValue Y = Vec.getOperand(1);
19285 
19286     // Vec's operand 0 is using indices from 0 to N-1 and
19287     // operand 1 from N to 2N - 1, where N is the number of
19288     // elements in the vectors.
19289     SDValue InsertVal0 = InsertVal.getOperand(0);
19290     int ElementOffset = -1;
19291 
19292     // We explore the inputs of the shuffle in order to see if we find the
19293     // source of the extract_vector_elt. If so, we can use it to modify the
19294     // shuffle rather than perform an insert_vector_elt.
19295     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
19296     ArgWorkList.emplace_back(Mask.size(), Y);
19297     ArgWorkList.emplace_back(0, X);
19298 
19299     while (!ArgWorkList.empty()) {
19300       int ArgOffset;
19301       SDValue ArgVal;
19302       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
19303 
19304       if (ArgVal == InsertVal0) {
19305         ElementOffset = ArgOffset;
19306         break;
19307       }
19308 
19309       // Peek through concat_vector.
19310       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
19311         int CurrentArgOffset =
19312             ArgOffset + ArgVal.getValueType().getVectorNumElements();
19313         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
19314         for (SDValue Op : reverse(ArgVal->ops())) {
19315           CurrentArgOffset -= Step;
19316           ArgWorkList.emplace_back(CurrentArgOffset, Op);
19317         }
19318 
19319         // Make sure we went through all the elements and did not screw up index
19320         // computation.
19321         assert(CurrentArgOffset == ArgOffset);
19322       }
19323     }
19324 
19325     // If we failed to find a match, see if we can replace an UNDEF shuffle
19326     // operand.
19327     if (ElementOffset == -1 && Y.isUndef() &&
19328         InsertVal0.getValueType() == Y.getValueType()) {
19329       ElementOffset = Mask.size();
19330       Y = InsertVal0;
19331     }
19332 
19333     if (ElementOffset != -1) {
19334       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
19335 
19336       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
19337       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
19338       assert(NewMask[InsIndex] <
19339                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
19340              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
19341 
19342       SDValue LegalShuffle =
19343               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
19344                                           Y, NewMask, DAG);
19345       if (LegalShuffle)
19346         return LegalShuffle;
19347     }
19348   }
19349 
19350   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
19351   // bitcast(shuffle (bitcast V), (extended X), Mask)
19352   // Note: We do not use an insert_subvector node because that requires a
19353   // legal subvector type.
19354   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
19355       !InsertVal.getOperand(0).getValueType().isVector())
19356     return SDValue();
19357 
19358   SDValue SubVec = InsertVal.getOperand(0);
19359   SDValue DestVec = N->getOperand(0);
19360   EVT SubVecVT = SubVec.getValueType();
19361   EVT VT = DestVec.getValueType();
19362   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
19363   // If the source only has a single vector element, the cost of creating adding
19364   // it to a vector is likely to exceed the cost of a insert_vector_elt.
19365   if (NumSrcElts == 1)
19366     return SDValue();
19367   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
19368   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
19369 
19370   // Step 1: Create a shuffle mask that implements this insert operation. The
19371   // vector that we are inserting into will be operand 0 of the shuffle, so
19372   // those elements are just 'i'. The inserted subvector is in the first
19373   // positions of operand 1 of the shuffle. Example:
19374   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
19375   SmallVector<int, 16> Mask(NumMaskVals);
19376   for (unsigned i = 0; i != NumMaskVals; ++i) {
19377     if (i / NumSrcElts == InsIndex)
19378       Mask[i] = (i % NumSrcElts) + NumMaskVals;
19379     else
19380       Mask[i] = i;
19381   }
19382 
19383   // Bail out if the target can not handle the shuffle we want to create.
19384   EVT SubVecEltVT = SubVecVT.getVectorElementType();
19385   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
19386   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
19387     return SDValue();
19388 
19389   // Step 2: Create a wide vector from the inserted source vector by appending
19390   // undefined elements. This is the same size as our destination vector.
19391   SDLoc DL(N);
19392   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
19393   ConcatOps[0] = SubVec;
19394   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
19395 
19396   // Step 3: Shuffle in the padded subvector.
19397   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
19398   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
19399   AddToWorklist(PaddedSubV.getNode());
19400   AddToWorklist(DestVecBC.getNode());
19401   AddToWorklist(Shuf.getNode());
19402   return DAG.getBitcast(VT, Shuf);
19403 }
19404 
19405 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
19406   SDValue InVec = N->getOperand(0);
19407   SDValue InVal = N->getOperand(1);
19408   SDValue EltNo = N->getOperand(2);
19409   SDLoc DL(N);
19410 
19411   EVT VT = InVec.getValueType();
19412   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
19413 
19414   // Insert into out-of-bounds element is undefined.
19415   if (IndexC && VT.isFixedLengthVector() &&
19416       IndexC->getZExtValue() >= VT.getVectorNumElements())
19417     return DAG.getUNDEF(VT);
19418 
19419   // Remove redundant insertions:
19420   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
19421   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19422       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
19423     return InVec;
19424 
19425   if (!IndexC) {
19426     // If this is variable insert to undef vector, it might be better to splat:
19427     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
19428     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
19429       if (VT.isScalableVector())
19430         return DAG.getSplatVector(VT, DL, InVal);
19431 
19432       SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
19433       return DAG.getBuildVector(VT, DL, Ops);
19434     }
19435     return SDValue();
19436   }
19437 
19438   if (VT.isScalableVector())
19439     return SDValue();
19440 
19441   unsigned NumElts = VT.getVectorNumElements();
19442 
19443   // We must know which element is being inserted for folds below here.
19444   unsigned Elt = IndexC->getZExtValue();
19445 
19446   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
19447     return Shuf;
19448 
19449   // Handle <1 x ???> vector insertion special cases.
19450   if (VT.getVectorNumElements() == 1) {
19451     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
19452     if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19453         InVal.getOperand(0).getValueType() == VT &&
19454         isNullConstant(InVal.getOperand(1)))
19455       return InVal.getOperand(0);
19456   }
19457 
19458   // Canonicalize insert_vector_elt dag nodes.
19459   // Example:
19460   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
19461   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
19462   //
19463   // Do this only if the child insert_vector node has one use; also
19464   // do this only if indices are both constants and Idx1 < Idx0.
19465   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
19466       && isa<ConstantSDNode>(InVec.getOperand(2))) {
19467     unsigned OtherElt = InVec.getConstantOperandVal(2);
19468     if (Elt < OtherElt) {
19469       // Swap nodes.
19470       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
19471                                   InVec.getOperand(0), InVal, EltNo);
19472       AddToWorklist(NewOp.getNode());
19473       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
19474                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
19475     }
19476   }
19477 
19478   // Attempt to fold the insertion into a legal BUILD_VECTOR.
19479   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
19480     auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
19481       assert(Ops.size() == NumElts && "Unexpected vector size");
19482 
19483       // Insert the element
19484       if (Elt < Ops.size()) {
19485         // All the operands of BUILD_VECTOR must have the same type;
19486         // we enforce that here.
19487         EVT OpVT = Ops[0].getValueType();
19488         Ops[Elt] =
19489             OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
19490       }
19491 
19492       // Return the new vector
19493       return DAG.getBuildVector(VT, DL, Ops);
19494     };
19495 
19496     // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
19497     // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
19498     // vector elements.
19499     SmallVector<SDValue, 8> Ops;
19500 
19501     // Do not combine these two vectors if the output vector will not replace
19502     // the input vector.
19503     if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
19504       Ops.append(InVec->op_begin(), InVec->op_end());
19505       return UpdateBuildVector(Ops);
19506     }
19507 
19508     if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) {
19509       Ops.push_back(InVec.getOperand(0));
19510       Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType()));
19511       return UpdateBuildVector(Ops);
19512     }
19513 
19514     if (InVec.isUndef()) {
19515       Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
19516       return UpdateBuildVector(Ops);
19517     }
19518 
19519     // If we're inserting into the end of a vector as part of an sequence, see
19520     // if we can create a BUILD_VECTOR by following the sequence back up the
19521     // chain.
19522     if (Elt == (NumElts - 1)) {
19523       SmallVector<SDValue> ReverseInsertions;
19524       ReverseInsertions.push_back(InVal);
19525 
19526       EVT MaxEltVT = InVal.getValueType();
19527       SDValue CurVec = InVec;
19528       for (unsigned I = 1; I != NumElts; ++I) {
19529         if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse())
19530           break;
19531 
19532         auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2));
19533         if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I))
19534           break;
19535         SDValue CurVal = CurVec.getOperand(1);
19536         ReverseInsertions.push_back(CurVal);
19537         if (VT.isInteger()) {
19538           EVT CurValVT = CurVal.getValueType();
19539           MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT;
19540         }
19541         CurVec = CurVec.getOperand(0);
19542       }
19543 
19544       if (ReverseInsertions.size() == NumElts) {
19545         for (unsigned I = 0; I != NumElts; ++I) {
19546           SDValue Val = ReverseInsertions[(NumElts - 1) - I];
19547           Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val;
19548           Ops.push_back(Val);
19549         }
19550         return DAG.getBuildVector(VT, DL, Ops);
19551       }
19552     }
19553   }
19554 
19555   return SDValue();
19556 }
19557 
19558 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
19559                                                   SDValue EltNo,
19560                                                   LoadSDNode *OriginalLoad) {
19561   assert(OriginalLoad->isSimple());
19562 
19563   EVT ResultVT = EVE->getValueType(0);
19564   EVT VecEltVT = InVecVT.getVectorElementType();
19565 
19566   // If the vector element type is not a multiple of a byte then we are unable
19567   // to correctly compute an address to load only the extracted element as a
19568   // scalar.
19569   if (!VecEltVT.isByteSized())
19570     return SDValue();
19571 
19572   ISD::LoadExtType ExtTy =
19573       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
19574   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
19575       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
19576     return SDValue();
19577 
19578   Align Alignment = OriginalLoad->getAlign();
19579   MachinePointerInfo MPI;
19580   SDLoc DL(EVE);
19581   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
19582     int Elt = ConstEltNo->getZExtValue();
19583     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
19584     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
19585     Alignment = commonAlignment(Alignment, PtrOff);
19586   } else {
19587     // Discard the pointer info except the address space because the memory
19588     // operand can't represent this new access since the offset is variable.
19589     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
19590     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
19591   }
19592 
19593   bool IsFast = false;
19594   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
19595                               OriginalLoad->getAddressSpace(), Alignment,
19596                               OriginalLoad->getMemOperand()->getFlags(),
19597                               &IsFast) ||
19598       !IsFast)
19599     return SDValue();
19600 
19601   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
19602                                                InVecVT, EltNo);
19603 
19604   // We are replacing a vector load with a scalar load. The new load must have
19605   // identical memory op ordering to the original.
19606   SDValue Load;
19607   if (ResultVT.bitsGT(VecEltVT)) {
19608     // If the result type of vextract is wider than the load, then issue an
19609     // extending load instead.
19610     ISD::LoadExtType ExtType =
19611         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
19612                                                               : ISD::EXTLOAD;
19613     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
19614                           NewPtr, MPI, VecEltVT, Alignment,
19615                           OriginalLoad->getMemOperand()->getFlags(),
19616                           OriginalLoad->getAAInfo());
19617     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19618   } else {
19619     // The result type is narrower or the same width as the vector element
19620     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
19621                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
19622                        OriginalLoad->getAAInfo());
19623     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
19624     if (ResultVT.bitsLT(VecEltVT))
19625       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
19626     else
19627       Load = DAG.getBitcast(ResultVT, Load);
19628   }
19629   ++OpsNarrowed;
19630   return Load;
19631 }
19632 
19633 /// Transform a vector binary operation into a scalar binary operation by moving
19634 /// the math/logic after an extract element of a vector.
19635 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
19636                                        bool LegalOperations) {
19637   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19638   SDValue Vec = ExtElt->getOperand(0);
19639   SDValue Index = ExtElt->getOperand(1);
19640   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19641   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
19642       Vec->getNumValues() != 1)
19643     return SDValue();
19644 
19645   // Targets may want to avoid this to prevent an expensive register transfer.
19646   if (!TLI.shouldScalarizeBinop(Vec))
19647     return SDValue();
19648 
19649   // Extracting an element of a vector constant is constant-folded, so this
19650   // transform is just replacing a vector op with a scalar op while moving the
19651   // extract.
19652   SDValue Op0 = Vec.getOperand(0);
19653   SDValue Op1 = Vec.getOperand(1);
19654   if (isAnyConstantBuildVector(Op0, true) ||
19655       isAnyConstantBuildVector(Op1, true)) {
19656     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
19657     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
19658     SDLoc DL(ExtElt);
19659     EVT VT = ExtElt->getValueType(0);
19660     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
19661     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
19662     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
19663   }
19664 
19665   return SDValue();
19666 }
19667 
19668 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
19669   SDValue VecOp = N->getOperand(0);
19670   SDValue Index = N->getOperand(1);
19671   EVT ScalarVT = N->getValueType(0);
19672   EVT VecVT = VecOp.getValueType();
19673   if (VecOp.isUndef())
19674     return DAG.getUNDEF(ScalarVT);
19675 
19676   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
19677   //
19678   // This only really matters if the index is non-constant since other combines
19679   // on the constant elements already work.
19680   SDLoc DL(N);
19681   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
19682       Index == VecOp.getOperand(2)) {
19683     SDValue Elt = VecOp.getOperand(1);
19684     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
19685   }
19686 
19687   // (vextract (scalar_to_vector val, 0) -> val
19688   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19689     // Only 0'th element of SCALAR_TO_VECTOR is defined.
19690     if (DAG.isKnownNeverZero(Index))
19691       return DAG.getUNDEF(ScalarVT);
19692 
19693     // Check if the result type doesn't match the inserted element type. A
19694     // SCALAR_TO_VECTOR may truncate the inserted element and the
19695     // EXTRACT_VECTOR_ELT may widen the extracted vector.
19696     SDValue InOp = VecOp.getOperand(0);
19697     if (InOp.getValueType() != ScalarVT) {
19698       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() &&
19699              InOp.getValueType().bitsGT(ScalarVT));
19700       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
19701     }
19702     return InOp;
19703   }
19704 
19705   // extract_vector_elt of out-of-bounds element -> UNDEF
19706   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19707   if (IndexC && VecVT.isFixedLengthVector() &&
19708       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
19709     return DAG.getUNDEF(ScalarVT);
19710 
19711   // extract_vector_elt (build_vector x, y), 1 -> y
19712   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
19713        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
19714       TLI.isTypeLegal(VecVT) &&
19715       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
19716     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
19717             VecVT.isFixedLengthVector()) &&
19718            "BUILD_VECTOR used for scalable vectors");
19719     unsigned IndexVal =
19720         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
19721     SDValue Elt = VecOp.getOperand(IndexVal);
19722     EVT InEltVT = Elt.getValueType();
19723 
19724     // Sometimes build_vector's scalar input types do not match result type.
19725     if (ScalarVT == InEltVT)
19726       return Elt;
19727 
19728     // TODO: It may be useful to truncate if free if the build_vector implicitly
19729     // converts.
19730   }
19731 
19732   if (VecVT.isScalableVector())
19733     return SDValue();
19734 
19735   // All the code from this point onwards assumes fixed width vectors, but it's
19736   // possible that some of the combinations could be made to work for scalable
19737   // vectors too.
19738   unsigned NumElts = VecVT.getVectorNumElements();
19739   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
19740 
19741   // TODO: These transforms should not require the 'hasOneUse' restriction, but
19742   // there are regressions on multiple targets without it. We can end up with a
19743   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
19744   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
19745       VecOp.hasOneUse()) {
19746     // The vector index of the LSBs of the source depend on the endian-ness.
19747     bool IsLE = DAG.getDataLayout().isLittleEndian();
19748     unsigned ExtractIndex = IndexC->getZExtValue();
19749     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
19750     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
19751     SDValue BCSrc = VecOp.getOperand(0);
19752     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
19753       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
19754 
19755     if (LegalTypes && BCSrc.getValueType().isInteger() &&
19756         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
19757       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
19758       // trunc i64 X to i32
19759       SDValue X = BCSrc.getOperand(0);
19760       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
19761              "Extract element and scalar to vector can't change element type "
19762              "from FP to integer.");
19763       unsigned XBitWidth = X.getValueSizeInBits();
19764       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
19765 
19766       // An extract element return value type can be wider than its vector
19767       // operand element type. In that case, the high bits are undefined, so
19768       // it's possible that we may need to extend rather than truncate.
19769       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
19770         assert(XBitWidth % VecEltBitWidth == 0 &&
19771                "Scalar bitwidth must be a multiple of vector element bitwidth");
19772         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
19773       }
19774     }
19775   }
19776 
19777   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
19778     return BO;
19779 
19780   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
19781   // We only perform this optimization before the op legalization phase because
19782   // we may introduce new vector instructions which are not backed by TD
19783   // patterns. For example on AVX, extracting elements from a wide vector
19784   // without using extract_subvector. However, if we can find an underlying
19785   // scalar value, then we can always use that.
19786   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
19787     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
19788     // Find the new index to extract from.
19789     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
19790 
19791     // Extracting an undef index is undef.
19792     if (OrigElt == -1)
19793       return DAG.getUNDEF(ScalarVT);
19794 
19795     // Select the right vector half to extract from.
19796     SDValue SVInVec;
19797     if (OrigElt < (int)NumElts) {
19798       SVInVec = VecOp.getOperand(0);
19799     } else {
19800       SVInVec = VecOp.getOperand(1);
19801       OrigElt -= NumElts;
19802     }
19803 
19804     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
19805       SDValue InOp = SVInVec.getOperand(OrigElt);
19806       if (InOp.getValueType() != ScalarVT) {
19807         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
19808         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
19809       }
19810 
19811       return InOp;
19812     }
19813 
19814     // FIXME: We should handle recursing on other vector shuffles and
19815     // scalar_to_vector here as well.
19816 
19817     if (!LegalOperations ||
19818         // FIXME: Should really be just isOperationLegalOrCustom.
19819         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
19820         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
19821       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
19822                          DAG.getVectorIdxConstant(OrigElt, DL));
19823     }
19824   }
19825 
19826   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
19827   // simplify it based on the (valid) extraction indices.
19828   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
19829         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19830                Use->getOperand(0) == VecOp &&
19831                isa<ConstantSDNode>(Use->getOperand(1));
19832       })) {
19833     APInt DemandedElts = APInt::getZero(NumElts);
19834     for (SDNode *Use : VecOp->uses()) {
19835       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
19836       if (CstElt->getAPIntValue().ult(NumElts))
19837         DemandedElts.setBit(CstElt->getZExtValue());
19838     }
19839     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
19840       // We simplified the vector operand of this extract element. If this
19841       // extract is not dead, visit it again so it is folded properly.
19842       if (N->getOpcode() != ISD::DELETED_NODE)
19843         AddToWorklist(N);
19844       return SDValue(N, 0);
19845     }
19846     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
19847     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
19848       // We simplified the vector operand of this extract element. If this
19849       // extract is not dead, visit it again so it is folded properly.
19850       if (N->getOpcode() != ISD::DELETED_NODE)
19851         AddToWorklist(N);
19852       return SDValue(N, 0);
19853     }
19854   }
19855 
19856   // Everything under here is trying to match an extract of a loaded value.
19857   // If the result of load has to be truncated, then it's not necessarily
19858   // profitable.
19859   bool BCNumEltsChanged = false;
19860   EVT ExtVT = VecVT.getVectorElementType();
19861   EVT LVT = ExtVT;
19862   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
19863     return SDValue();
19864 
19865   if (VecOp.getOpcode() == ISD::BITCAST) {
19866     // Don't duplicate a load with other uses.
19867     if (!VecOp.hasOneUse())
19868       return SDValue();
19869 
19870     EVT BCVT = VecOp.getOperand(0).getValueType();
19871     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
19872       return SDValue();
19873     if (NumElts != BCVT.getVectorNumElements())
19874       BCNumEltsChanged = true;
19875     VecOp = VecOp.getOperand(0);
19876     ExtVT = BCVT.getVectorElementType();
19877   }
19878 
19879   // extract (vector load $addr), i --> load $addr + i * size
19880   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
19881       ISD::isNormalLoad(VecOp.getNode()) &&
19882       !Index->hasPredecessor(VecOp.getNode())) {
19883     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
19884     if (VecLoad && VecLoad->isSimple())
19885       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
19886   }
19887 
19888   // Perform only after legalization to ensure build_vector / vector_shuffle
19889   // optimizations have already been done.
19890   if (!LegalOperations || !IndexC)
19891     return SDValue();
19892 
19893   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
19894   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
19895   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
19896   int Elt = IndexC->getZExtValue();
19897   LoadSDNode *LN0 = nullptr;
19898   if (ISD::isNormalLoad(VecOp.getNode())) {
19899     LN0 = cast<LoadSDNode>(VecOp);
19900   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19901              VecOp.getOperand(0).getValueType() == ExtVT &&
19902              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
19903     // Don't duplicate a load with other uses.
19904     if (!VecOp.hasOneUse())
19905       return SDValue();
19906 
19907     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
19908   }
19909   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
19910     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
19911     // =>
19912     // (load $addr+1*size)
19913 
19914     // Don't duplicate a load with other uses.
19915     if (!VecOp.hasOneUse())
19916       return SDValue();
19917 
19918     // If the bit convert changed the number of elements, it is unsafe
19919     // to examine the mask.
19920     if (BCNumEltsChanged)
19921       return SDValue();
19922 
19923     // Select the input vector, guarding against out of range extract vector.
19924     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
19925     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
19926 
19927     if (VecOp.getOpcode() == ISD::BITCAST) {
19928       // Don't duplicate a load with other uses.
19929       if (!VecOp.hasOneUse())
19930         return SDValue();
19931 
19932       VecOp = VecOp.getOperand(0);
19933     }
19934     if (ISD::isNormalLoad(VecOp.getNode())) {
19935       LN0 = cast<LoadSDNode>(VecOp);
19936       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
19937       Index = DAG.getConstant(Elt, DL, Index.getValueType());
19938     }
19939   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
19940              VecVT.getVectorElementType() == ScalarVT &&
19941              (!LegalTypes ||
19942               TLI.isTypeLegal(
19943                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
19944     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
19945     //      -> extract_vector_elt a, 0
19946     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
19947     //      -> extract_vector_elt a, 1
19948     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
19949     //      -> extract_vector_elt b, 0
19950     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
19951     //      -> extract_vector_elt b, 1
19952     SDLoc SL(N);
19953     EVT ConcatVT = VecOp.getOperand(0).getValueType();
19954     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
19955     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
19956                                      Index.getValueType());
19957 
19958     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
19959     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
19960                               ConcatVT.getVectorElementType(),
19961                               ConcatOp, NewIdx);
19962     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
19963   }
19964 
19965   // Make sure we found a non-volatile load and the extractelement is
19966   // the only use.
19967   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
19968     return SDValue();
19969 
19970   // If Idx was -1 above, Elt is going to be -1, so just return undef.
19971   if (Elt == -1)
19972     return DAG.getUNDEF(LVT);
19973 
19974   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
19975 }
19976 
19977 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
19978 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
19979   // We perform this optimization post type-legalization because
19980   // the type-legalizer often scalarizes integer-promoted vectors.
19981   // Performing this optimization before may create bit-casts which
19982   // will be type-legalized to complex code sequences.
19983   // We perform this optimization only before the operation legalizer because we
19984   // may introduce illegal operations.
19985   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
19986     return SDValue();
19987 
19988   unsigned NumInScalars = N->getNumOperands();
19989   SDLoc DL(N);
19990   EVT VT = N->getValueType(0);
19991 
19992   // Check to see if this is a BUILD_VECTOR of a bunch of values
19993   // which come from any_extend or zero_extend nodes. If so, we can create
19994   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
19995   // optimizations. We do not handle sign-extend because we can't fill the sign
19996   // using shuffles.
19997   EVT SourceType = MVT::Other;
19998   bool AllAnyExt = true;
19999 
20000   for (unsigned i = 0; i != NumInScalars; ++i) {
20001     SDValue In = N->getOperand(i);
20002     // Ignore undef inputs.
20003     if (In.isUndef()) continue;
20004 
20005     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
20006     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
20007 
20008     // Abort if the element is not an extension.
20009     if (!ZeroExt && !AnyExt) {
20010       SourceType = MVT::Other;
20011       break;
20012     }
20013 
20014     // The input is a ZeroExt or AnyExt. Check the original type.
20015     EVT InTy = In.getOperand(0).getValueType();
20016 
20017     // Check that all of the widened source types are the same.
20018     if (SourceType == MVT::Other)
20019       // First time.
20020       SourceType = InTy;
20021     else if (InTy != SourceType) {
20022       // Multiple income types. Abort.
20023       SourceType = MVT::Other;
20024       break;
20025     }
20026 
20027     // Check if all of the extends are ANY_EXTENDs.
20028     AllAnyExt &= AnyExt;
20029   }
20030 
20031   // In order to have valid types, all of the inputs must be extended from the
20032   // same source type and all of the inputs must be any or zero extend.
20033   // Scalar sizes must be a power of two.
20034   EVT OutScalarTy = VT.getScalarType();
20035   bool ValidTypes = SourceType != MVT::Other &&
20036                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
20037                  isPowerOf2_32(SourceType.getSizeInBits());
20038 
20039   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
20040   // turn into a single shuffle instruction.
20041   if (!ValidTypes)
20042     return SDValue();
20043 
20044   // If we already have a splat buildvector, then don't fold it if it means
20045   // introducing zeros.
20046   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
20047     return SDValue();
20048 
20049   bool isLE = DAG.getDataLayout().isLittleEndian();
20050   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
20051   assert(ElemRatio > 1 && "Invalid element size ratio");
20052   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
20053                                DAG.getConstant(0, DL, SourceType);
20054 
20055   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
20056   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
20057 
20058   // Populate the new build_vector
20059   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20060     SDValue Cast = N->getOperand(i);
20061     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
20062             Cast.getOpcode() == ISD::ZERO_EXTEND ||
20063             Cast.isUndef()) && "Invalid cast opcode");
20064     SDValue In;
20065     if (Cast.isUndef())
20066       In = DAG.getUNDEF(SourceType);
20067     else
20068       In = Cast->getOperand(0);
20069     unsigned Index = isLE ? (i * ElemRatio) :
20070                             (i * ElemRatio + (ElemRatio - 1));
20071 
20072     assert(Index < Ops.size() && "Invalid index");
20073     Ops[Index] = In;
20074   }
20075 
20076   // The type of the new BUILD_VECTOR node.
20077   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
20078   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
20079          "Invalid vector size");
20080   // Check if the new vector type is legal.
20081   if (!isTypeLegal(VecVT) ||
20082       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
20083        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
20084     return SDValue();
20085 
20086   // Make the new BUILD_VECTOR.
20087   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
20088 
20089   // The new BUILD_VECTOR node has the potential to be further optimized.
20090   AddToWorklist(BV.getNode());
20091   // Bitcast to the desired type.
20092   return DAG.getBitcast(VT, BV);
20093 }
20094 
20095 // Simplify (build_vec (trunc $1)
20096 //                     (trunc (srl $1 half-width))
20097 //                     (trunc (srl $1 (2 * half-width))) …)
20098 // to (bitcast $1)
20099 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
20100   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
20101 
20102   // Only for little endian
20103   if (!DAG.getDataLayout().isLittleEndian())
20104     return SDValue();
20105 
20106   SDLoc DL(N);
20107   EVT VT = N->getValueType(0);
20108   EVT OutScalarTy = VT.getScalarType();
20109   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
20110 
20111   // Only for power of two types to be sure that bitcast works well
20112   if (!isPowerOf2_64(ScalarTypeBitsize))
20113     return SDValue();
20114 
20115   unsigned NumInScalars = N->getNumOperands();
20116 
20117   // Look through bitcasts
20118   auto PeekThroughBitcast = [](SDValue Op) {
20119     if (Op.getOpcode() == ISD::BITCAST)
20120       return Op.getOperand(0);
20121     return Op;
20122   };
20123 
20124   // The source value where all the parts are extracted.
20125   SDValue Src;
20126   for (unsigned i = 0; i != NumInScalars; ++i) {
20127     SDValue In = PeekThroughBitcast(N->getOperand(i));
20128     // Ignore undef inputs.
20129     if (In.isUndef()) continue;
20130 
20131     if (In.getOpcode() != ISD::TRUNCATE)
20132       return SDValue();
20133 
20134     In = PeekThroughBitcast(In.getOperand(0));
20135 
20136     if (In.getOpcode() != ISD::SRL) {
20137       // For now only build_vec without shuffling, handle shifts here in the
20138       // future.
20139       if (i != 0)
20140         return SDValue();
20141 
20142       Src = In;
20143     } else {
20144       // In is SRL
20145       SDValue part = PeekThroughBitcast(In.getOperand(0));
20146 
20147       if (!Src) {
20148         Src = part;
20149       } else if (Src != part) {
20150         // Vector parts do not stem from the same variable
20151         return SDValue();
20152       }
20153 
20154       SDValue ShiftAmtVal = In.getOperand(1);
20155       if (!isa<ConstantSDNode>(ShiftAmtVal))
20156         return SDValue();
20157 
20158       uint64_t ShiftAmt = In.getConstantOperandVal(1);
20159 
20160       // The extracted value is not extracted at the right position
20161       if (ShiftAmt != i * ScalarTypeBitsize)
20162         return SDValue();
20163     }
20164   }
20165 
20166   // Only cast if the size is the same
20167   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
20168     return SDValue();
20169 
20170   return DAG.getBitcast(VT, Src);
20171 }
20172 
20173 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
20174                                            ArrayRef<int> VectorMask,
20175                                            SDValue VecIn1, SDValue VecIn2,
20176                                            unsigned LeftIdx, bool DidSplitVec) {
20177   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
20178 
20179   EVT VT = N->getValueType(0);
20180   EVT InVT1 = VecIn1.getValueType();
20181   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
20182 
20183   unsigned NumElems = VT.getVectorNumElements();
20184   unsigned ShuffleNumElems = NumElems;
20185 
20186   // If we artificially split a vector in two already, then the offsets in the
20187   // operands will all be based off of VecIn1, even those in VecIn2.
20188   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
20189 
20190   uint64_t VTSize = VT.getFixedSizeInBits();
20191   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
20192   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
20193 
20194   assert(InVT2Size <= InVT1Size &&
20195          "Inputs must be sorted to be in non-increasing vector size order.");
20196 
20197   // We can't generate a shuffle node with mismatched input and output types.
20198   // Try to make the types match the type of the output.
20199   if (InVT1 != VT || InVT2 != VT) {
20200     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
20201       // If the output vector length is a multiple of both input lengths,
20202       // we can concatenate them and pad the rest with undefs.
20203       unsigned NumConcats = VTSize / InVT1Size;
20204       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
20205       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
20206       ConcatOps[0] = VecIn1;
20207       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
20208       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20209       VecIn2 = SDValue();
20210     } else if (InVT1Size == VTSize * 2) {
20211       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
20212         return SDValue();
20213 
20214       if (!VecIn2.getNode()) {
20215         // If we only have one input vector, and it's twice the size of the
20216         // output, split it in two.
20217         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
20218                              DAG.getVectorIdxConstant(NumElems, DL));
20219         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
20220         // Since we now have shorter input vectors, adjust the offset of the
20221         // second vector's start.
20222         Vec2Offset = NumElems;
20223       } else {
20224         assert(InVT2Size <= InVT1Size &&
20225                "Second input is not going to be larger than the first one.");
20226 
20227         // VecIn1 is wider than the output, and we have another, possibly
20228         // smaller input. Pad the smaller input with undefs, shuffle at the
20229         // input vector width, and extract the output.
20230         // The shuffle type is different than VT, so check legality again.
20231         if (LegalOperations &&
20232             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
20233           return SDValue();
20234 
20235         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
20236         // lower it back into a BUILD_VECTOR. So if the inserted type is
20237         // illegal, don't even try.
20238         if (InVT1 != InVT2) {
20239           if (!TLI.isTypeLegal(InVT2))
20240             return SDValue();
20241           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
20242                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
20243         }
20244         ShuffleNumElems = NumElems * 2;
20245       }
20246     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
20247       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
20248       ConcatOps[0] = VecIn2;
20249       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
20250     } else {
20251       // TODO: Support cases where the length mismatch isn't exactly by a
20252       // factor of 2.
20253       // TODO: Move this check upwards, so that if we have bad type
20254       // mismatches, we don't create any DAG nodes.
20255       return SDValue();
20256     }
20257   }
20258 
20259   // Initialize mask to undef.
20260   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
20261 
20262   // Only need to run up to the number of elements actually used, not the
20263   // total number of elements in the shuffle - if we are shuffling a wider
20264   // vector, the high lanes should be set to undef.
20265   for (unsigned i = 0; i != NumElems; ++i) {
20266     if (VectorMask[i] <= 0)
20267       continue;
20268 
20269     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
20270     if (VectorMask[i] == (int)LeftIdx) {
20271       Mask[i] = ExtIndex;
20272     } else if (VectorMask[i] == (int)LeftIdx + 1) {
20273       Mask[i] = Vec2Offset + ExtIndex;
20274     }
20275   }
20276 
20277   // The type the input vectors may have changed above.
20278   InVT1 = VecIn1.getValueType();
20279 
20280   // If we already have a VecIn2, it should have the same type as VecIn1.
20281   // If we don't, get an undef/zero vector of the appropriate type.
20282   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
20283   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
20284 
20285   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
20286   if (ShuffleNumElems > NumElems)
20287     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
20288 
20289   return Shuffle;
20290 }
20291 
20292 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
20293   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
20294 
20295   // First, determine where the build vector is not undef.
20296   // TODO: We could extend this to handle zero elements as well as undefs.
20297   int NumBVOps = BV->getNumOperands();
20298   int ZextElt = -1;
20299   for (int i = 0; i != NumBVOps; ++i) {
20300     SDValue Op = BV->getOperand(i);
20301     if (Op.isUndef())
20302       continue;
20303     if (ZextElt == -1)
20304       ZextElt = i;
20305     else
20306       return SDValue();
20307   }
20308   // Bail out if there's no non-undef element.
20309   if (ZextElt == -1)
20310     return SDValue();
20311 
20312   // The build vector contains some number of undef elements and exactly
20313   // one other element. That other element must be a zero-extended scalar
20314   // extracted from a vector at a constant index to turn this into a shuffle.
20315   // Also, require that the build vector does not implicitly truncate/extend
20316   // its elements.
20317   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
20318   EVT VT = BV->getValueType(0);
20319   SDValue Zext = BV->getOperand(ZextElt);
20320   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
20321       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20322       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
20323       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
20324     return SDValue();
20325 
20326   // The zero-extend must be a multiple of the source size, and we must be
20327   // building a vector of the same size as the source of the extract element.
20328   SDValue Extract = Zext.getOperand(0);
20329   unsigned DestSize = Zext.getValueSizeInBits();
20330   unsigned SrcSize = Extract.getValueSizeInBits();
20331   if (DestSize % SrcSize != 0 ||
20332       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
20333     return SDValue();
20334 
20335   // Create a shuffle mask that will combine the extracted element with zeros
20336   // and undefs.
20337   int ZextRatio = DestSize / SrcSize;
20338   int NumMaskElts = NumBVOps * ZextRatio;
20339   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
20340   for (int i = 0; i != NumMaskElts; ++i) {
20341     if (i / ZextRatio == ZextElt) {
20342       // The low bits of the (potentially translated) extracted element map to
20343       // the source vector. The high bits map to zero. We will use a zero vector
20344       // as the 2nd source operand of the shuffle, so use the 1st element of
20345       // that vector (mask value is number-of-elements) for the high bits.
20346       if (i % ZextRatio == 0)
20347         ShufMask[i] = Extract.getConstantOperandVal(1);
20348       else
20349         ShufMask[i] = NumMaskElts;
20350     }
20351 
20352     // Undef elements of the build vector remain undef because we initialize
20353     // the shuffle mask with -1.
20354   }
20355 
20356   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
20357   // bitcast (shuffle V, ZeroVec, VectorMask)
20358   SDLoc DL(BV);
20359   EVT VecVT = Extract.getOperand(0).getValueType();
20360   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
20361   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20362   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
20363                                              ZeroVec, ShufMask, DAG);
20364   if (!Shuf)
20365     return SDValue();
20366   return DAG.getBitcast(VT, Shuf);
20367 }
20368 
20369 // FIXME: promote to STLExtras.
20370 template <typename R, typename T>
20371 static auto getFirstIndexOf(R &&Range, const T &Val) {
20372   auto I = find(Range, Val);
20373   if (I == Range.end())
20374     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
20375   return std::distance(Range.begin(), I);
20376 }
20377 
20378 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
20379 // operations. If the types of the vectors we're extracting from allow it,
20380 // turn this into a vector_shuffle node.
20381 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
20382   SDLoc DL(N);
20383   EVT VT = N->getValueType(0);
20384 
20385   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
20386   if (!isTypeLegal(VT))
20387     return SDValue();
20388 
20389   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
20390     return V;
20391 
20392   // May only combine to shuffle after legalize if shuffle is legal.
20393   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
20394     return SDValue();
20395 
20396   bool UsesZeroVector = false;
20397   unsigned NumElems = N->getNumOperands();
20398 
20399   // Record, for each element of the newly built vector, which input vector
20400   // that element comes from. -1 stands for undef, 0 for the zero vector,
20401   // and positive values for the input vectors.
20402   // VectorMask maps each element to its vector number, and VecIn maps vector
20403   // numbers to their initial SDValues.
20404 
20405   SmallVector<int, 8> VectorMask(NumElems, -1);
20406   SmallVector<SDValue, 8> VecIn;
20407   VecIn.push_back(SDValue());
20408 
20409   for (unsigned i = 0; i != NumElems; ++i) {
20410     SDValue Op = N->getOperand(i);
20411 
20412     if (Op.isUndef())
20413       continue;
20414 
20415     // See if we can use a blend with a zero vector.
20416     // TODO: Should we generalize this to a blend with an arbitrary constant
20417     // vector?
20418     if (isNullConstant(Op) || isNullFPConstant(Op)) {
20419       UsesZeroVector = true;
20420       VectorMask[i] = 0;
20421       continue;
20422     }
20423 
20424     // Not an undef or zero. If the input is something other than an
20425     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
20426     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
20427         !isa<ConstantSDNode>(Op.getOperand(1)))
20428       return SDValue();
20429     SDValue ExtractedFromVec = Op.getOperand(0);
20430 
20431     if (ExtractedFromVec.getValueType().isScalableVector())
20432       return SDValue();
20433 
20434     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
20435     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
20436       return SDValue();
20437 
20438     // All inputs must have the same element type as the output.
20439     if (VT.getVectorElementType() !=
20440         ExtractedFromVec.getValueType().getVectorElementType())
20441       return SDValue();
20442 
20443     // Have we seen this input vector before?
20444     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
20445     // a map back from SDValues to numbers isn't worth it.
20446     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
20447     if (Idx == -1) { // A new source vector?
20448       Idx = VecIn.size();
20449       VecIn.push_back(ExtractedFromVec);
20450     }
20451 
20452     VectorMask[i] = Idx;
20453   }
20454 
20455   // If we didn't find at least one input vector, bail out.
20456   if (VecIn.size() < 2)
20457     return SDValue();
20458 
20459   // If all the Operands of BUILD_VECTOR extract from same
20460   // vector, then split the vector efficiently based on the maximum
20461   // vector access index and adjust the VectorMask and
20462   // VecIn accordingly.
20463   bool DidSplitVec = false;
20464   if (VecIn.size() == 2) {
20465     unsigned MaxIndex = 0;
20466     unsigned NearestPow2 = 0;
20467     SDValue Vec = VecIn.back();
20468     EVT InVT = Vec.getValueType();
20469     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
20470 
20471     for (unsigned i = 0; i < NumElems; i++) {
20472       if (VectorMask[i] <= 0)
20473         continue;
20474       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
20475       IndexVec[i] = Index;
20476       MaxIndex = std::max(MaxIndex, Index);
20477     }
20478 
20479     NearestPow2 = PowerOf2Ceil(MaxIndex);
20480     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
20481         NumElems * 2 < NearestPow2) {
20482       unsigned SplitSize = NearestPow2 / 2;
20483       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
20484                                      InVT.getVectorElementType(), SplitSize);
20485       if (TLI.isTypeLegal(SplitVT) &&
20486           SplitSize + SplitVT.getVectorNumElements() <=
20487               InVT.getVectorNumElements()) {
20488         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20489                                      DAG.getVectorIdxConstant(SplitSize, DL));
20490         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
20491                                      DAG.getVectorIdxConstant(0, DL));
20492         VecIn.pop_back();
20493         VecIn.push_back(VecIn1);
20494         VecIn.push_back(VecIn2);
20495         DidSplitVec = true;
20496 
20497         for (unsigned i = 0; i < NumElems; i++) {
20498           if (VectorMask[i] <= 0)
20499             continue;
20500           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
20501         }
20502       }
20503     }
20504   }
20505 
20506   // Sort input vectors by decreasing vector element count,
20507   // while preserving the relative order of equally-sized vectors.
20508   // Note that we keep the first "implicit zero vector as-is.
20509   SmallVector<SDValue, 8> SortedVecIn(VecIn);
20510   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
20511                     [](const SDValue &a, const SDValue &b) {
20512                       return a.getValueType().getVectorNumElements() >
20513                              b.getValueType().getVectorNumElements();
20514                     });
20515 
20516   // We now also need to rebuild the VectorMask, because it referenced element
20517   // order in VecIn, and we just sorted them.
20518   for (int &SourceVectorIndex : VectorMask) {
20519     if (SourceVectorIndex <= 0)
20520       continue;
20521     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
20522     assert(Idx > 0 && Idx < SortedVecIn.size() &&
20523            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
20524     SourceVectorIndex = Idx;
20525   }
20526 
20527   VecIn = std::move(SortedVecIn);
20528 
20529   // TODO: Should this fire if some of the input vectors has illegal type (like
20530   // it does now), or should we let legalization run its course first?
20531 
20532   // Shuffle phase:
20533   // Take pairs of vectors, and shuffle them so that the result has elements
20534   // from these vectors in the correct places.
20535   // For example, given:
20536   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
20537   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
20538   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
20539   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
20540   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
20541   // We will generate:
20542   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
20543   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
20544   SmallVector<SDValue, 4> Shuffles;
20545   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
20546     unsigned LeftIdx = 2 * In + 1;
20547     SDValue VecLeft = VecIn[LeftIdx];
20548     SDValue VecRight =
20549         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
20550 
20551     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
20552                                                 VecRight, LeftIdx, DidSplitVec))
20553       Shuffles.push_back(Shuffle);
20554     else
20555       return SDValue();
20556   }
20557 
20558   // If we need the zero vector as an "ingredient" in the blend tree, add it
20559   // to the list of shuffles.
20560   if (UsesZeroVector)
20561     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
20562                                       : DAG.getConstantFP(0.0, DL, VT));
20563 
20564   // If we only have one shuffle, we're done.
20565   if (Shuffles.size() == 1)
20566     return Shuffles[0];
20567 
20568   // Update the vector mask to point to the post-shuffle vectors.
20569   for (int &Vec : VectorMask)
20570     if (Vec == 0)
20571       Vec = Shuffles.size() - 1;
20572     else
20573       Vec = (Vec - 1) / 2;
20574 
20575   // More than one shuffle. Generate a binary tree of blends, e.g. if from
20576   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
20577   // generate:
20578   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
20579   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
20580   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
20581   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
20582   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
20583   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
20584   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
20585 
20586   // Make sure the initial size of the shuffle list is even.
20587   if (Shuffles.size() % 2)
20588     Shuffles.push_back(DAG.getUNDEF(VT));
20589 
20590   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
20591     if (CurSize % 2) {
20592       Shuffles[CurSize] = DAG.getUNDEF(VT);
20593       CurSize++;
20594     }
20595     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
20596       int Left = 2 * In;
20597       int Right = 2 * In + 1;
20598       SmallVector<int, 8> Mask(NumElems, -1);
20599       SDValue L = Shuffles[Left];
20600       ArrayRef<int> LMask;
20601       bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
20602                            L.use_empty() && L.getOperand(1).isUndef() &&
20603                            L.getOperand(0).getValueType() == L.getValueType();
20604       if (IsLeftShuffle) {
20605         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
20606         L = L.getOperand(0);
20607       }
20608       SDValue R = Shuffles[Right];
20609       ArrayRef<int> RMask;
20610       bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
20611                             R.use_empty() && R.getOperand(1).isUndef() &&
20612                             R.getOperand(0).getValueType() == R.getValueType();
20613       if (IsRightShuffle) {
20614         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
20615         R = R.getOperand(0);
20616       }
20617       for (unsigned I = 0; I != NumElems; ++I) {
20618         if (VectorMask[I] == Left) {
20619           Mask[I] = I;
20620           if (IsLeftShuffle)
20621             Mask[I] = LMask[I];
20622           VectorMask[I] = In;
20623         } else if (VectorMask[I] == Right) {
20624           Mask[I] = I + NumElems;
20625           if (IsRightShuffle)
20626             Mask[I] = RMask[I] + NumElems;
20627           VectorMask[I] = In;
20628         }
20629       }
20630 
20631       Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
20632     }
20633   }
20634   return Shuffles[0];
20635 }
20636 
20637 // Try to turn a build vector of zero extends of extract vector elts into a
20638 // a vector zero extend and possibly an extract subvector.
20639 // TODO: Support sign extend?
20640 // TODO: Allow undef elements?
20641 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
20642   if (LegalOperations)
20643     return SDValue();
20644 
20645   EVT VT = N->getValueType(0);
20646 
20647   bool FoundZeroExtend = false;
20648   SDValue Op0 = N->getOperand(0);
20649   auto checkElem = [&](SDValue Op) -> int64_t {
20650     unsigned Opc = Op.getOpcode();
20651     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
20652     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
20653         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20654         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
20655       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
20656         return C->getZExtValue();
20657     return -1;
20658   };
20659 
20660   // Make sure the first element matches
20661   // (zext (extract_vector_elt X, C))
20662   // Offset must be a constant multiple of the
20663   // known-minimum vector length of the result type.
20664   int64_t Offset = checkElem(Op0);
20665   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
20666     return SDValue();
20667 
20668   unsigned NumElems = N->getNumOperands();
20669   SDValue In = Op0.getOperand(0).getOperand(0);
20670   EVT InSVT = In.getValueType().getScalarType();
20671   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
20672 
20673   // Don't create an illegal input type after type legalization.
20674   if (LegalTypes && !TLI.isTypeLegal(InVT))
20675     return SDValue();
20676 
20677   // Ensure all the elements come from the same vector and are adjacent.
20678   for (unsigned i = 1; i != NumElems; ++i) {
20679     if ((Offset + i) != checkElem(N->getOperand(i)))
20680       return SDValue();
20681   }
20682 
20683   SDLoc DL(N);
20684   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
20685                    Op0.getOperand(0).getOperand(1));
20686   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
20687                      VT, In);
20688 }
20689 
20690 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
20691   EVT VT = N->getValueType(0);
20692 
20693   // A vector built entirely of undefs is undef.
20694   if (ISD::allOperandsUndef(N))
20695     return DAG.getUNDEF(VT);
20696 
20697   // If this is a splat of a bitcast from another vector, change to a
20698   // concat_vector.
20699   // For example:
20700   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
20701   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
20702   //
20703   // If X is a build_vector itself, the concat can become a larger build_vector.
20704   // TODO: Maybe this is useful for non-splat too?
20705   if (!LegalOperations) {
20706     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20707       Splat = peekThroughBitcasts(Splat);
20708       EVT SrcVT = Splat.getValueType();
20709       if (SrcVT.isVector()) {
20710         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
20711         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
20712                                      SrcVT.getVectorElementType(), NumElts);
20713         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
20714           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
20715           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
20716                                        NewVT, Ops);
20717           return DAG.getBitcast(VT, Concat);
20718         }
20719       }
20720     }
20721   }
20722 
20723   // Check if we can express BUILD VECTOR via subvector extract.
20724   if (!LegalTypes && (N->getNumOperands() > 1)) {
20725     SDValue Op0 = N->getOperand(0);
20726     auto checkElem = [&](SDValue Op) -> uint64_t {
20727       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
20728           (Op0.getOperand(0) == Op.getOperand(0)))
20729         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
20730           return CNode->getZExtValue();
20731       return -1;
20732     };
20733 
20734     int Offset = checkElem(Op0);
20735     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
20736       if (Offset + i != checkElem(N->getOperand(i))) {
20737         Offset = -1;
20738         break;
20739       }
20740     }
20741 
20742     if ((Offset == 0) &&
20743         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
20744       return Op0.getOperand(0);
20745     if ((Offset != -1) &&
20746         ((Offset % N->getValueType(0).getVectorNumElements()) ==
20747          0)) // IDX must be multiple of output size.
20748       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
20749                          Op0.getOperand(0), Op0.getOperand(1));
20750   }
20751 
20752   if (SDValue V = convertBuildVecZextToZext(N))
20753     return V;
20754 
20755   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
20756     return V;
20757 
20758   if (SDValue V = reduceBuildVecTruncToBitCast(N))
20759     return V;
20760 
20761   if (SDValue V = reduceBuildVecToShuffle(N))
20762     return V;
20763 
20764   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
20765   // Do this late as some of the above may replace the splat.
20766   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
20767     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
20768       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
20769       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
20770     }
20771 
20772   return SDValue();
20773 }
20774 
20775 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
20776   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20777   EVT OpVT = N->getOperand(0).getValueType();
20778 
20779   // If the operands are legal vectors, leave them alone.
20780   if (TLI.isTypeLegal(OpVT))
20781     return SDValue();
20782 
20783   SDLoc DL(N);
20784   EVT VT = N->getValueType(0);
20785   SmallVector<SDValue, 8> Ops;
20786 
20787   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
20788   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20789 
20790   // Keep track of what we encounter.
20791   bool AnyInteger = false;
20792   bool AnyFP = false;
20793   for (const SDValue &Op : N->ops()) {
20794     if (ISD::BITCAST == Op.getOpcode() &&
20795         !Op.getOperand(0).getValueType().isVector())
20796       Ops.push_back(Op.getOperand(0));
20797     else if (ISD::UNDEF == Op.getOpcode())
20798       Ops.push_back(ScalarUndef);
20799     else
20800       return SDValue();
20801 
20802     // Note whether we encounter an integer or floating point scalar.
20803     // If it's neither, bail out, it could be something weird like x86mmx.
20804     EVT LastOpVT = Ops.back().getValueType();
20805     if (LastOpVT.isFloatingPoint())
20806       AnyFP = true;
20807     else if (LastOpVT.isInteger())
20808       AnyInteger = true;
20809     else
20810       return SDValue();
20811   }
20812 
20813   // If any of the operands is a floating point scalar bitcast to a vector,
20814   // use floating point types throughout, and bitcast everything.
20815   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
20816   if (AnyFP) {
20817     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
20818     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
20819     if (AnyInteger) {
20820       for (SDValue &Op : Ops) {
20821         if (Op.getValueType() == SVT)
20822           continue;
20823         if (Op.isUndef())
20824           Op = ScalarUndef;
20825         else
20826           Op = DAG.getBitcast(SVT, Op);
20827       }
20828     }
20829   }
20830 
20831   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
20832                                VT.getSizeInBits() / SVT.getSizeInBits());
20833   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
20834 }
20835 
20836 // Attempt to merge nested concat_vectors/undefs.
20837 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
20838 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
20839 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
20840                                                   SelectionDAG &DAG) {
20841   EVT VT = N->getValueType(0);
20842 
20843   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
20844   EVT SubVT;
20845   SDValue FirstConcat;
20846   for (const SDValue &Op : N->ops()) {
20847     if (Op.isUndef())
20848       continue;
20849     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
20850       return SDValue();
20851     if (!FirstConcat) {
20852       SubVT = Op.getOperand(0).getValueType();
20853       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
20854         return SDValue();
20855       FirstConcat = Op;
20856       continue;
20857     }
20858     if (SubVT != Op.getOperand(0).getValueType())
20859       return SDValue();
20860   }
20861   assert(FirstConcat && "Concat of all-undefs found");
20862 
20863   SmallVector<SDValue> ConcatOps;
20864   for (const SDValue &Op : N->ops()) {
20865     if (Op.isUndef()) {
20866       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
20867       continue;
20868     }
20869     ConcatOps.append(Op->op_begin(), Op->op_end());
20870   }
20871   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
20872 }
20873 
20874 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
20875 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
20876 // most two distinct vectors the same size as the result, attempt to turn this
20877 // into a legal shuffle.
20878 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
20879   EVT VT = N->getValueType(0);
20880   EVT OpVT = N->getOperand(0).getValueType();
20881 
20882   // We currently can't generate an appropriate shuffle for a scalable vector.
20883   if (VT.isScalableVector())
20884     return SDValue();
20885 
20886   int NumElts = VT.getVectorNumElements();
20887   int NumOpElts = OpVT.getVectorNumElements();
20888 
20889   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
20890   SmallVector<int, 8> Mask;
20891 
20892   for (SDValue Op : N->ops()) {
20893     Op = peekThroughBitcasts(Op);
20894 
20895     // UNDEF nodes convert to UNDEF shuffle mask values.
20896     if (Op.isUndef()) {
20897       Mask.append((unsigned)NumOpElts, -1);
20898       continue;
20899     }
20900 
20901     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20902       return SDValue();
20903 
20904     // What vector are we extracting the subvector from and at what index?
20905     SDValue ExtVec = Op.getOperand(0);
20906     int ExtIdx = Op.getConstantOperandVal(1);
20907 
20908     // We want the EVT of the original extraction to correctly scale the
20909     // extraction index.
20910     EVT ExtVT = ExtVec.getValueType();
20911     ExtVec = peekThroughBitcasts(ExtVec);
20912 
20913     // UNDEF nodes convert to UNDEF shuffle mask values.
20914     if (ExtVec.isUndef()) {
20915       Mask.append((unsigned)NumOpElts, -1);
20916       continue;
20917     }
20918 
20919     // Ensure that we are extracting a subvector from a vector the same
20920     // size as the result.
20921     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
20922       return SDValue();
20923 
20924     // Scale the subvector index to account for any bitcast.
20925     int NumExtElts = ExtVT.getVectorNumElements();
20926     if (0 == (NumExtElts % NumElts))
20927       ExtIdx /= (NumExtElts / NumElts);
20928     else if (0 == (NumElts % NumExtElts))
20929       ExtIdx *= (NumElts / NumExtElts);
20930     else
20931       return SDValue();
20932 
20933     // At most we can reference 2 inputs in the final shuffle.
20934     if (SV0.isUndef() || SV0 == ExtVec) {
20935       SV0 = ExtVec;
20936       for (int i = 0; i != NumOpElts; ++i)
20937         Mask.push_back(i + ExtIdx);
20938     } else if (SV1.isUndef() || SV1 == ExtVec) {
20939       SV1 = ExtVec;
20940       for (int i = 0; i != NumOpElts; ++i)
20941         Mask.push_back(i + ExtIdx + NumElts);
20942     } else {
20943       return SDValue();
20944     }
20945   }
20946 
20947   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20948   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
20949                                      DAG.getBitcast(VT, SV1), Mask, DAG);
20950 }
20951 
20952 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
20953   unsigned CastOpcode = N->getOperand(0).getOpcode();
20954   switch (CastOpcode) {
20955   case ISD::SINT_TO_FP:
20956   case ISD::UINT_TO_FP:
20957   case ISD::FP_TO_SINT:
20958   case ISD::FP_TO_UINT:
20959     // TODO: Allow more opcodes?
20960     //  case ISD::BITCAST:
20961     //  case ISD::TRUNCATE:
20962     //  case ISD::ZERO_EXTEND:
20963     //  case ISD::SIGN_EXTEND:
20964     //  case ISD::FP_EXTEND:
20965     break;
20966   default:
20967     return SDValue();
20968   }
20969 
20970   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
20971   if (!SrcVT.isVector())
20972     return SDValue();
20973 
20974   // All operands of the concat must be the same kind of cast from the same
20975   // source type.
20976   SmallVector<SDValue, 4> SrcOps;
20977   for (SDValue Op : N->ops()) {
20978     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
20979         Op.getOperand(0).getValueType() != SrcVT)
20980       return SDValue();
20981     SrcOps.push_back(Op.getOperand(0));
20982   }
20983 
20984   // The wider cast must be supported by the target. This is unusual because
20985   // the operation support type parameter depends on the opcode. In addition,
20986   // check the other type in the cast to make sure this is really legal.
20987   EVT VT = N->getValueType(0);
20988   EVT SrcEltVT = SrcVT.getVectorElementType();
20989   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
20990   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
20991   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20992   switch (CastOpcode) {
20993   case ISD::SINT_TO_FP:
20994   case ISD::UINT_TO_FP:
20995     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
20996         !TLI.isTypeLegal(VT))
20997       return SDValue();
20998     break;
20999   case ISD::FP_TO_SINT:
21000   case ISD::FP_TO_UINT:
21001     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
21002         !TLI.isTypeLegal(ConcatSrcVT))
21003       return SDValue();
21004     break;
21005   default:
21006     llvm_unreachable("Unexpected cast opcode");
21007   }
21008 
21009   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
21010   SDLoc DL(N);
21011   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
21012   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
21013 }
21014 
21015 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
21016   // If we only have one input vector, we don't need to do any concatenation.
21017   if (N->getNumOperands() == 1)
21018     return N->getOperand(0);
21019 
21020   // Check if all of the operands are undefs.
21021   EVT VT = N->getValueType(0);
21022   if (ISD::allOperandsUndef(N))
21023     return DAG.getUNDEF(VT);
21024 
21025   // Optimize concat_vectors where all but the first of the vectors are undef.
21026   if (all_of(drop_begin(N->ops()),
21027              [](const SDValue &Op) { return Op.isUndef(); })) {
21028     SDValue In = N->getOperand(0);
21029     assert(In.getValueType().isVector() && "Must concat vectors");
21030 
21031     // If the input is a concat_vectors, just make a larger concat by padding
21032     // with smaller undefs.
21033     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
21034       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
21035       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
21036       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
21037       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21038     }
21039 
21040     SDValue Scalar = peekThroughOneUseBitcasts(In);
21041 
21042     // concat_vectors(scalar_to_vector(scalar), undef) ->
21043     //     scalar_to_vector(scalar)
21044     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
21045          Scalar.hasOneUse()) {
21046       EVT SVT = Scalar.getValueType().getVectorElementType();
21047       if (SVT == Scalar.getOperand(0).getValueType())
21048         Scalar = Scalar.getOperand(0);
21049     }
21050 
21051     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
21052     if (!Scalar.getValueType().isVector()) {
21053       // If the bitcast type isn't legal, it might be a trunc of a legal type;
21054       // look through the trunc so we can still do the transform:
21055       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
21056       if (Scalar->getOpcode() == ISD::TRUNCATE &&
21057           !TLI.isTypeLegal(Scalar.getValueType()) &&
21058           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
21059         Scalar = Scalar->getOperand(0);
21060 
21061       EVT SclTy = Scalar.getValueType();
21062 
21063       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
21064         return SDValue();
21065 
21066       // Bail out if the vector size is not a multiple of the scalar size.
21067       if (VT.getSizeInBits() % SclTy.getSizeInBits())
21068         return SDValue();
21069 
21070       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
21071       if (VNTNumElms < 2)
21072         return SDValue();
21073 
21074       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
21075       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
21076         return SDValue();
21077 
21078       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
21079       return DAG.getBitcast(VT, Res);
21080     }
21081   }
21082 
21083   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
21084   // We have already tested above for an UNDEF only concatenation.
21085   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
21086   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
21087   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
21088     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
21089   };
21090   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
21091     SmallVector<SDValue, 8> Opnds;
21092     EVT SVT = VT.getScalarType();
21093 
21094     EVT MinVT = SVT;
21095     if (!SVT.isFloatingPoint()) {
21096       // If BUILD_VECTOR are from built from integer, they may have different
21097       // operand types. Get the smallest type and truncate all operands to it.
21098       bool FoundMinVT = false;
21099       for (const SDValue &Op : N->ops())
21100         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
21101           EVT OpSVT = Op.getOperand(0).getValueType();
21102           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
21103           FoundMinVT = true;
21104         }
21105       assert(FoundMinVT && "Concat vector type mismatch");
21106     }
21107 
21108     for (const SDValue &Op : N->ops()) {
21109       EVT OpVT = Op.getValueType();
21110       unsigned NumElts = OpVT.getVectorNumElements();
21111 
21112       if (ISD::UNDEF == Op.getOpcode())
21113         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
21114 
21115       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
21116         if (SVT.isFloatingPoint()) {
21117           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
21118           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
21119         } else {
21120           for (unsigned i = 0; i != NumElts; ++i)
21121             Opnds.push_back(
21122                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
21123         }
21124       }
21125     }
21126 
21127     assert(VT.getVectorNumElements() == Opnds.size() &&
21128            "Concat vector type mismatch");
21129     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
21130   }
21131 
21132   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
21133   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
21134   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
21135     return V;
21136 
21137   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
21138     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
21139     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
21140       return V;
21141 
21142     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
21143     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
21144       return V;
21145   }
21146 
21147   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
21148     return V;
21149 
21150   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
21151   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
21152   // operands and look for a CONCAT operations that place the incoming vectors
21153   // at the exact same location.
21154   //
21155   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
21156   SDValue SingleSource = SDValue();
21157   unsigned PartNumElem =
21158       N->getOperand(0).getValueType().getVectorMinNumElements();
21159 
21160   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
21161     SDValue Op = N->getOperand(i);
21162 
21163     if (Op.isUndef())
21164       continue;
21165 
21166     // Check if this is the identity extract:
21167     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
21168       return SDValue();
21169 
21170     // Find the single incoming vector for the extract_subvector.
21171     if (SingleSource.getNode()) {
21172       if (Op.getOperand(0) != SingleSource)
21173         return SDValue();
21174     } else {
21175       SingleSource = Op.getOperand(0);
21176 
21177       // Check the source type is the same as the type of the result.
21178       // If not, this concat may extend the vector, so we can not
21179       // optimize it away.
21180       if (SingleSource.getValueType() != N->getValueType(0))
21181         return SDValue();
21182     }
21183 
21184     // Check that we are reading from the identity index.
21185     unsigned IdentityIndex = i * PartNumElem;
21186     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
21187       return SDValue();
21188   }
21189 
21190   if (SingleSource.getNode())
21191     return SingleSource;
21192 
21193   return SDValue();
21194 }
21195 
21196 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
21197 // if the subvector can be sourced for free.
21198 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
21199   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
21200       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
21201     return V.getOperand(1);
21202   }
21203   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21204   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
21205       V.getOperand(0).getValueType() == SubVT &&
21206       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
21207     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
21208     return V.getOperand(SubIdx);
21209   }
21210   return SDValue();
21211 }
21212 
21213 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
21214                                               SelectionDAG &DAG,
21215                                               bool LegalOperations) {
21216   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21217   SDValue BinOp = Extract->getOperand(0);
21218   unsigned BinOpcode = BinOp.getOpcode();
21219   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
21220     return SDValue();
21221 
21222   EVT VecVT = BinOp.getValueType();
21223   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
21224   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
21225     return SDValue();
21226 
21227   SDValue Index = Extract->getOperand(1);
21228   EVT SubVT = Extract->getValueType(0);
21229   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
21230     return SDValue();
21231 
21232   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
21233   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
21234 
21235   // TODO: We could handle the case where only 1 operand is being inserted by
21236   //       creating an extract of the other operand, but that requires checking
21237   //       number of uses and/or costs.
21238   if (!Sub0 || !Sub1)
21239     return SDValue();
21240 
21241   // We are inserting both operands of the wide binop only to extract back
21242   // to the narrow vector size. Eliminate all of the insert/extract:
21243   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
21244   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
21245                      BinOp->getFlags());
21246 }
21247 
21248 /// If we are extracting a subvector produced by a wide binary operator try
21249 /// to use a narrow binary operator and/or avoid concatenation and extraction.
21250 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
21251                                           bool LegalOperations) {
21252   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
21253   // some of these bailouts with other transforms.
21254 
21255   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
21256     return V;
21257 
21258   // The extract index must be a constant, so we can map it to a concat operand.
21259   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
21260   if (!ExtractIndexC)
21261     return SDValue();
21262 
21263   // We are looking for an optionally bitcasted wide vector binary operator
21264   // feeding an extract subvector.
21265   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21266   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
21267   unsigned BOpcode = BinOp.getOpcode();
21268   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
21269     return SDValue();
21270 
21271   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
21272   // reduced to the unary fneg when it is visited, and we probably want to deal
21273   // with fneg in a target-specific way.
21274   if (BOpcode == ISD::FSUB) {
21275     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
21276     if (C && C->getValueAPF().isNegZero())
21277       return SDValue();
21278   }
21279 
21280   // The binop must be a vector type, so we can extract some fraction of it.
21281   EVT WideBVT = BinOp.getValueType();
21282   // The optimisations below currently assume we are dealing with fixed length
21283   // vectors. It is possible to add support for scalable vectors, but at the
21284   // moment we've done no analysis to prove whether they are profitable or not.
21285   if (!WideBVT.isFixedLengthVector())
21286     return SDValue();
21287 
21288   EVT VT = Extract->getValueType(0);
21289   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
21290   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
21291          "Extract index is not a multiple of the vector length.");
21292 
21293   // Bail out if this is not a proper multiple width extraction.
21294   unsigned WideWidth = WideBVT.getSizeInBits();
21295   unsigned NarrowWidth = VT.getSizeInBits();
21296   if (WideWidth % NarrowWidth != 0)
21297     return SDValue();
21298 
21299   // Bail out if we are extracting a fraction of a single operation. This can
21300   // occur because we potentially looked through a bitcast of the binop.
21301   unsigned NarrowingRatio = WideWidth / NarrowWidth;
21302   unsigned WideNumElts = WideBVT.getVectorNumElements();
21303   if (WideNumElts % NarrowingRatio != 0)
21304     return SDValue();
21305 
21306   // Bail out if the target does not support a narrower version of the binop.
21307   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
21308                                    WideNumElts / NarrowingRatio);
21309   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
21310     return SDValue();
21311 
21312   // If extraction is cheap, we don't need to look at the binop operands
21313   // for concat ops. The narrow binop alone makes this transform profitable.
21314   // We can't just reuse the original extract index operand because we may have
21315   // bitcasted.
21316   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
21317   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
21318   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
21319       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
21320     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
21321     SDLoc DL(Extract);
21322     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21323     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21324                             BinOp.getOperand(0), NewExtIndex);
21325     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21326                             BinOp.getOperand(1), NewExtIndex);
21327     SDValue NarrowBinOp =
21328         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
21329     return DAG.getBitcast(VT, NarrowBinOp);
21330   }
21331 
21332   // Only handle the case where we are doubling and then halving. A larger ratio
21333   // may require more than two narrow binops to replace the wide binop.
21334   if (NarrowingRatio != 2)
21335     return SDValue();
21336 
21337   // TODO: The motivating case for this transform is an x86 AVX1 target. That
21338   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
21339   // flavors, but no other 256-bit integer support. This could be extended to
21340   // handle any binop, but that may require fixing/adding other folds to avoid
21341   // codegen regressions.
21342   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
21343     return SDValue();
21344 
21345   // We need at least one concatenation operation of a binop operand to make
21346   // this transform worthwhile. The concat must double the input vector sizes.
21347   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
21348     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
21349       return V.getOperand(ConcatOpNum);
21350     return SDValue();
21351   };
21352   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
21353   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
21354 
21355   if (SubVecL || SubVecR) {
21356     // If a binop operand was not the result of a concat, we must extract a
21357     // half-sized operand for our new narrow binop:
21358     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
21359     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
21360     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
21361     SDLoc DL(Extract);
21362     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
21363     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
21364                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21365                                       BinOp.getOperand(0), IndexC);
21366 
21367     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
21368                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
21369                                       BinOp.getOperand(1), IndexC);
21370 
21371     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
21372     return DAG.getBitcast(VT, NarrowBinOp);
21373   }
21374 
21375   return SDValue();
21376 }
21377 
21378 /// If we are extracting a subvector from a wide vector load, convert to a
21379 /// narrow load to eliminate the extraction:
21380 /// (extract_subvector (load wide vector)) --> (load narrow vector)
21381 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
21382   // TODO: Add support for big-endian. The offset calculation must be adjusted.
21383   if (DAG.getDataLayout().isBigEndian())
21384     return SDValue();
21385 
21386   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
21387   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
21388     return SDValue();
21389 
21390   // Allow targets to opt-out.
21391   EVT VT = Extract->getValueType(0);
21392 
21393   // We can only create byte sized loads.
21394   if (!VT.isByteSized())
21395     return SDValue();
21396 
21397   unsigned Index = Extract->getConstantOperandVal(1);
21398   unsigned NumElts = VT.getVectorMinNumElements();
21399 
21400   // The definition of EXTRACT_SUBVECTOR states that the index must be a
21401   // multiple of the minimum number of elements in the result type.
21402   assert(Index % NumElts == 0 && "The extract subvector index is not a "
21403                                  "multiple of the result's element count");
21404 
21405   // It's fine to use TypeSize here as we know the offset will not be negative.
21406   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
21407 
21408   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21409   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
21410     return SDValue();
21411 
21412   // The narrow load will be offset from the base address of the old load if
21413   // we are extracting from something besides index 0 (little-endian).
21414   SDLoc DL(Extract);
21415 
21416   // TODO: Use "BaseIndexOffset" to make this more effective.
21417   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
21418 
21419   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
21420   MachineFunction &MF = DAG.getMachineFunction();
21421   MachineMemOperand *MMO;
21422   if (Offset.isScalable()) {
21423     MachinePointerInfo MPI =
21424         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
21425     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
21426   } else
21427     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
21428                                   StoreSize);
21429 
21430   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
21431   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
21432   return NewLd;
21433 }
21434 
21435 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
21436 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
21437 ///                                EXTRACT_SUBVECTOR(Op?, ?),
21438 ///                                Mask'))
21439 /// iff it is legal and profitable to do so. Notably, the trimmed mask
21440 /// (containing only the elements that are extracted)
21441 /// must reference at most two subvectors.
21442 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
21443                                                      SelectionDAG &DAG,
21444                                                      const TargetLowering &TLI,
21445                                                      bool LegalOperations) {
21446   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21447          "Must only be called on EXTRACT_SUBVECTOR's");
21448 
21449   SDValue N0 = N->getOperand(0);
21450 
21451   // Only deal with non-scalable vectors.
21452   EVT NarrowVT = N->getValueType(0);
21453   EVT WideVT = N0.getValueType();
21454   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
21455     return SDValue();
21456 
21457   // The operand must be a shufflevector.
21458   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
21459   if (!WideShuffleVector)
21460     return SDValue();
21461 
21462   // The old shuffleneeds to go away.
21463   if (!WideShuffleVector->hasOneUse())
21464     return SDValue();
21465 
21466   // And the narrow shufflevector that we'll form must be legal.
21467   if (LegalOperations &&
21468       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
21469     return SDValue();
21470 
21471   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
21472   int NumEltsExtracted = NarrowVT.getVectorNumElements();
21473   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
21474          "Extract index is not a multiple of the output vector length.");
21475 
21476   int WideNumElts = WideVT.getVectorNumElements();
21477 
21478   SmallVector<int, 16> NewMask;
21479   NewMask.reserve(NumEltsExtracted);
21480   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
21481       DemandedSubvectors;
21482 
21483   // Try to decode the wide mask into narrow mask from at most two subvectors.
21484   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
21485                                                   NumEltsExtracted)) {
21486     assert((M >= -1) && (M < (2 * WideNumElts)) &&
21487            "Out-of-bounds shuffle mask?");
21488 
21489     if (M < 0) {
21490       // Does not depend on operands, does not require adjustment.
21491       NewMask.emplace_back(M);
21492       continue;
21493     }
21494 
21495     // From which operand of the shuffle does this shuffle mask element pick?
21496     int WideShufOpIdx = M / WideNumElts;
21497     // Which element of that operand is picked?
21498     int OpEltIdx = M % WideNumElts;
21499 
21500     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
21501            "Shuffle mask vector decomposition failure.");
21502 
21503     // And which NumEltsExtracted-sized subvector of that operand is that?
21504     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
21505     // And which element within that subvector of that operand is that?
21506     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
21507 
21508     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
21509            "Shuffle mask subvector decomposition failure.");
21510 
21511     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
21512             WideShufOpIdx * WideNumElts) == M &&
21513            "Shuffle mask full decomposition failure.");
21514 
21515     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
21516 
21517     if (Op.isUndef()) {
21518       // Picking from an undef operand. Let's adjust mask instead.
21519       NewMask.emplace_back(-1);
21520       continue;
21521     }
21522 
21523     // Profitability check: only deal with extractions from the first subvector.
21524     if (OpSubvecIdx != 0)
21525       return SDValue();
21526 
21527     const std::pair<SDValue, int> DemandedSubvector =
21528         std::make_pair(Op, OpSubvecIdx);
21529 
21530     if (DemandedSubvectors.insert(DemandedSubvector)) {
21531       if (DemandedSubvectors.size() > 2)
21532         return SDValue(); // We can't handle more than two subvectors.
21533       // How many elements into the WideVT does this subvector start?
21534       int Index = NumEltsExtracted * OpSubvecIdx;
21535       // Bail out if the extraction isn't going to be cheap.
21536       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
21537         return SDValue();
21538     }
21539 
21540     // Ok, but from which operand of the new shuffle will this element pick?
21541     int NewOpIdx =
21542         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
21543     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
21544 
21545     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
21546     NewMask.emplace_back(AdjM);
21547   }
21548   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
21549   assert(DemandedSubvectors.size() <= 2 &&
21550          "Should have ended up demanding at most two subvectors.");
21551 
21552   // Did we discover that the shuffle does not actually depend on operands?
21553   if (DemandedSubvectors.empty())
21554     return DAG.getUNDEF(NarrowVT);
21555 
21556   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
21557   // operand[s]/index[es], so there is no point in checking for it's legality.
21558 
21559   // Do not turn a legal shuffle into an illegal one.
21560   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
21561       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
21562     return SDValue();
21563 
21564   SDLoc DL(N);
21565 
21566   SmallVector<SDValue, 2> NewOps;
21567   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
21568            &DemandedSubvector : DemandedSubvectors) {
21569     // How many elements into the WideVT does this subvector start?
21570     int Index = NumEltsExtracted * DemandedSubvector.second;
21571     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
21572     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
21573                                     DemandedSubvector.first, IndexC));
21574   }
21575   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
21576          "Should end up with either one or two ops");
21577 
21578   // If we ended up with only one operand, pad with an undef.
21579   if (NewOps.size() == 1)
21580     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
21581 
21582   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
21583 }
21584 
21585 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
21586   EVT NVT = N->getValueType(0);
21587   SDValue V = N->getOperand(0);
21588   uint64_t ExtIdx = N->getConstantOperandVal(1);
21589 
21590   // Extract from UNDEF is UNDEF.
21591   if (V.isUndef())
21592     return DAG.getUNDEF(NVT);
21593 
21594   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
21595     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
21596       return NarrowLoad;
21597 
21598   // Combine an extract of an extract into a single extract_subvector.
21599   // ext (ext X, C), 0 --> ext X, C
21600   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
21601     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
21602                                     V.getConstantOperandVal(1)) &&
21603         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
21604       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
21605                          V.getOperand(1));
21606     }
21607   }
21608 
21609   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
21610   if (V.getOpcode() == ISD::SPLAT_VECTOR)
21611     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
21612       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
21613         return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
21614 
21615   // Try to move vector bitcast after extract_subv by scaling extraction index:
21616   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
21617   if (V.getOpcode() == ISD::BITCAST &&
21618       V.getOperand(0).getValueType().isVector() &&
21619       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
21620     SDValue SrcOp = V.getOperand(0);
21621     EVT SrcVT = SrcOp.getValueType();
21622     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
21623     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
21624     if ((SrcNumElts % DestNumElts) == 0) {
21625       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
21626       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
21627       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
21628                                       NewExtEC);
21629       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21630         SDLoc DL(N);
21631         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
21632         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21633                                          V.getOperand(0), NewIndex);
21634         return DAG.getBitcast(NVT, NewExtract);
21635       }
21636     }
21637     if ((DestNumElts % SrcNumElts) == 0) {
21638       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
21639       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
21640         ElementCount NewExtEC =
21641             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
21642         EVT ScalarVT = SrcVT.getScalarType();
21643         if ((ExtIdx % DestSrcRatio) == 0) {
21644           SDLoc DL(N);
21645           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
21646           EVT NewExtVT =
21647               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
21648           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
21649             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21650             SDValue NewExtract =
21651                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
21652                             V.getOperand(0), NewIndex);
21653             return DAG.getBitcast(NVT, NewExtract);
21654           }
21655           if (NewExtEC.isScalar() &&
21656               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
21657             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
21658             SDValue NewExtract =
21659                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
21660                             V.getOperand(0), NewIndex);
21661             return DAG.getBitcast(NVT, NewExtract);
21662           }
21663         }
21664       }
21665     }
21666   }
21667 
21668   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
21669     unsigned ExtNumElts = NVT.getVectorMinNumElements();
21670     EVT ConcatSrcVT = V.getOperand(0).getValueType();
21671     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
21672            "Concat and extract subvector do not change element type");
21673     assert((ExtIdx % ExtNumElts) == 0 &&
21674            "Extract index is not a multiple of the input vector length.");
21675 
21676     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
21677     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
21678 
21679     // If the concatenated source types match this extract, it's a direct
21680     // simplification:
21681     // extract_subvec (concat V1, V2, ...), i --> Vi
21682     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
21683       return V.getOperand(ConcatOpIdx);
21684 
21685     // If the concatenated source vectors are a multiple length of this extract,
21686     // then extract a fraction of one of those source vectors directly from a
21687     // concat operand. Example:
21688     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
21689     //   v2i8 extract_subvec v8i8 Y, 6
21690     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
21691         ConcatSrcNumElts % ExtNumElts == 0) {
21692       SDLoc DL(N);
21693       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
21694       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
21695              "Trying to extract from >1 concat operand?");
21696       assert(NewExtIdx % ExtNumElts == 0 &&
21697              "Extract index is not a multiple of the input vector length.");
21698       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
21699       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
21700                          V.getOperand(ConcatOpIdx), NewIndexC);
21701     }
21702   }
21703 
21704   if (SDValue V =
21705           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
21706     return V;
21707 
21708   V = peekThroughBitcasts(V);
21709 
21710   // If the input is a build vector. Try to make a smaller build vector.
21711   if (V.getOpcode() == ISD::BUILD_VECTOR) {
21712     EVT InVT = V.getValueType();
21713     unsigned ExtractSize = NVT.getSizeInBits();
21714     unsigned EltSize = InVT.getScalarSizeInBits();
21715     // Only do this if we won't split any elements.
21716     if (ExtractSize % EltSize == 0) {
21717       unsigned NumElems = ExtractSize / EltSize;
21718       EVT EltVT = InVT.getVectorElementType();
21719       EVT ExtractVT =
21720           NumElems == 1 ? EltVT
21721                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
21722       if ((Level < AfterLegalizeDAG ||
21723            (NumElems == 1 ||
21724             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
21725           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
21726         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
21727 
21728         if (NumElems == 1) {
21729           SDValue Src = V->getOperand(IdxVal);
21730           if (EltVT != Src.getValueType())
21731             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
21732           return DAG.getBitcast(NVT, Src);
21733         }
21734 
21735         // Extract the pieces from the original build_vector.
21736         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
21737                                               V->ops().slice(IdxVal, NumElems));
21738         return DAG.getBitcast(NVT, BuildVec);
21739       }
21740     }
21741   }
21742 
21743   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
21744     // Handle only simple case where vector being inserted and vector
21745     // being extracted are of same size.
21746     EVT SmallVT = V.getOperand(1).getValueType();
21747     if (!NVT.bitsEq(SmallVT))
21748       return SDValue();
21749 
21750     // Combine:
21751     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
21752     // Into:
21753     //    indices are equal or bit offsets are equal => V1
21754     //    otherwise => (extract_subvec V1, ExtIdx)
21755     uint64_t InsIdx = V.getConstantOperandVal(2);
21756     if (InsIdx * SmallVT.getScalarSizeInBits() ==
21757         ExtIdx * NVT.getScalarSizeInBits()) {
21758       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
21759         return SDValue();
21760 
21761       return DAG.getBitcast(NVT, V.getOperand(1));
21762     }
21763     return DAG.getNode(
21764         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
21765         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
21766         N->getOperand(1));
21767   }
21768 
21769   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
21770     return NarrowBOp;
21771 
21772   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21773     return SDValue(N, 0);
21774 
21775   return SDValue();
21776 }
21777 
21778 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
21779 /// followed by concatenation. Narrow vector ops may have better performance
21780 /// than wide ops, and this can unlock further narrowing of other vector ops.
21781 /// Targets can invert this transform later if it is not profitable.
21782 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
21783                                          SelectionDAG &DAG) {
21784   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
21785   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
21786       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
21787       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
21788     return SDValue();
21789 
21790   // Split the wide shuffle mask into halves. Any mask element that is accessing
21791   // operand 1 is offset down to account for narrowing of the vectors.
21792   ArrayRef<int> Mask = Shuf->getMask();
21793   EVT VT = Shuf->getValueType(0);
21794   unsigned NumElts = VT.getVectorNumElements();
21795   unsigned HalfNumElts = NumElts / 2;
21796   SmallVector<int, 16> Mask0(HalfNumElts, -1);
21797   SmallVector<int, 16> Mask1(HalfNumElts, -1);
21798   for (unsigned i = 0; i != NumElts; ++i) {
21799     if (Mask[i] == -1)
21800       continue;
21801     // If we reference the upper (undef) subvector then the element is undef.
21802     if ((Mask[i] % NumElts) >= HalfNumElts)
21803       continue;
21804     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
21805     if (i < HalfNumElts)
21806       Mask0[i] = M;
21807     else
21808       Mask1[i - HalfNumElts] = M;
21809   }
21810 
21811   // Ask the target if this is a valid transform.
21812   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21813   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
21814                                 HalfNumElts);
21815   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
21816       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
21817     return SDValue();
21818 
21819   // shuffle (concat X, undef), (concat Y, undef), Mask -->
21820   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
21821   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
21822   SDLoc DL(Shuf);
21823   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
21824   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
21825   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
21826 }
21827 
21828 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
21829 // or turn a shuffle of a single concat into simpler shuffle then concat.
21830 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
21831   EVT VT = N->getValueType(0);
21832   unsigned NumElts = VT.getVectorNumElements();
21833 
21834   SDValue N0 = N->getOperand(0);
21835   SDValue N1 = N->getOperand(1);
21836   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
21837   ArrayRef<int> Mask = SVN->getMask();
21838 
21839   SmallVector<SDValue, 4> Ops;
21840   EVT ConcatVT = N0.getOperand(0).getValueType();
21841   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
21842   unsigned NumConcats = NumElts / NumElemsPerConcat;
21843 
21844   auto IsUndefMaskElt = [](int i) { return i == -1; };
21845 
21846   // Special case: shuffle(concat(A,B)) can be more efficiently represented
21847   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
21848   // half vector elements.
21849   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
21850       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
21851                    IsUndefMaskElt)) {
21852     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
21853                               N0.getOperand(1),
21854                               Mask.slice(0, NumElemsPerConcat));
21855     N1 = DAG.getUNDEF(ConcatVT);
21856     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
21857   }
21858 
21859   // Look at every vector that's inserted. We're looking for exact
21860   // subvector-sized copies from a concatenated vector
21861   for (unsigned I = 0; I != NumConcats; ++I) {
21862     unsigned Begin = I * NumElemsPerConcat;
21863     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
21864 
21865     // Make sure we're dealing with a copy.
21866     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
21867       Ops.push_back(DAG.getUNDEF(ConcatVT));
21868       continue;
21869     }
21870 
21871     int OpIdx = -1;
21872     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
21873       if (IsUndefMaskElt(SubMask[i]))
21874         continue;
21875       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
21876         return SDValue();
21877       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
21878       if (0 <= OpIdx && EltOpIdx != OpIdx)
21879         return SDValue();
21880       OpIdx = EltOpIdx;
21881     }
21882     assert(0 <= OpIdx && "Unknown concat_vectors op");
21883 
21884     if (OpIdx < (int)N0.getNumOperands())
21885       Ops.push_back(N0.getOperand(OpIdx));
21886     else
21887       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
21888   }
21889 
21890   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21891 }
21892 
21893 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21894 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21895 //
21896 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
21897 // a simplification in some sense, but it isn't appropriate in general: some
21898 // BUILD_VECTORs are substantially cheaper than others. The general case
21899 // of a BUILD_VECTOR requires inserting each element individually (or
21900 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
21901 // all constants is a single constant pool load.  A BUILD_VECTOR where each
21902 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
21903 // are undef lowers to a small number of element insertions.
21904 //
21905 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
21906 // We don't fold shuffles where one side is a non-zero constant, and we don't
21907 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
21908 // non-constant operands. This seems to work out reasonably well in practice.
21909 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
21910                                        SelectionDAG &DAG,
21911                                        const TargetLowering &TLI) {
21912   EVT VT = SVN->getValueType(0);
21913   unsigned NumElts = VT.getVectorNumElements();
21914   SDValue N0 = SVN->getOperand(0);
21915   SDValue N1 = SVN->getOperand(1);
21916 
21917   if (!N0->hasOneUse())
21918     return SDValue();
21919 
21920   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
21921   // discussed above.
21922   if (!N1.isUndef()) {
21923     if (!N1->hasOneUse())
21924       return SDValue();
21925 
21926     bool N0AnyConst = isAnyConstantBuildVector(N0);
21927     bool N1AnyConst = isAnyConstantBuildVector(N1);
21928     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
21929       return SDValue();
21930     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
21931       return SDValue();
21932   }
21933 
21934   // If both inputs are splats of the same value then we can safely merge this
21935   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
21936   bool IsSplat = false;
21937   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
21938   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
21939   if (BV0 && BV1)
21940     if (SDValue Splat0 = BV0->getSplatValue())
21941       IsSplat = (Splat0 == BV1->getSplatValue());
21942 
21943   SmallVector<SDValue, 8> Ops;
21944   SmallSet<SDValue, 16> DuplicateOps;
21945   for (int M : SVN->getMask()) {
21946     SDValue Op = DAG.getUNDEF(VT.getScalarType());
21947     if (M >= 0) {
21948       int Idx = M < (int)NumElts ? M : M - NumElts;
21949       SDValue &S = (M < (int)NumElts ? N0 : N1);
21950       if (S.getOpcode() == ISD::BUILD_VECTOR) {
21951         Op = S.getOperand(Idx);
21952       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
21953         SDValue Op0 = S.getOperand(0);
21954         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
21955       } else {
21956         // Operand can't be combined - bail out.
21957         return SDValue();
21958       }
21959     }
21960 
21961     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
21962     // generating a splat; semantically, this is fine, but it's likely to
21963     // generate low-quality code if the target can't reconstruct an appropriate
21964     // shuffle.
21965     if (!Op.isUndef() && !isIntOrFPConstant(Op))
21966       if (!IsSplat && !DuplicateOps.insert(Op).second)
21967         return SDValue();
21968 
21969     Ops.push_back(Op);
21970   }
21971 
21972   // BUILD_VECTOR requires all inputs to be of the same type, find the
21973   // maximum type and extend them all.
21974   EVT SVT = VT.getScalarType();
21975   if (SVT.isInteger())
21976     for (SDValue &Op : Ops)
21977       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
21978   if (SVT != VT.getScalarType())
21979     for (SDValue &Op : Ops)
21980       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
21981                         : (TLI.isZExtFree(Op.getValueType(), SVT)
21982                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
21983                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
21984   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
21985 }
21986 
21987 // Match shuffles that can be converted to any_vector_extend_in_reg.
21988 // This is often generated during legalization.
21989 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
21990 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
21991 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
21992                                             SelectionDAG &DAG,
21993                                             const TargetLowering &TLI,
21994                                             bool LegalOperations) {
21995   EVT VT = SVN->getValueType(0);
21996   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
21997 
21998   // TODO Add support for big-endian when we have a test case.
21999   if (!VT.isInteger() || IsBigEndian)
22000     return SDValue();
22001 
22002   unsigned NumElts = VT.getVectorNumElements();
22003   unsigned EltSizeInBits = VT.getScalarSizeInBits();
22004   ArrayRef<int> Mask = SVN->getMask();
22005   SDValue N0 = SVN->getOperand(0);
22006 
22007   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
22008   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
22009     for (unsigned i = 0; i != NumElts; ++i) {
22010       if (Mask[i] < 0)
22011         continue;
22012       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
22013         continue;
22014       return false;
22015     }
22016     return true;
22017   };
22018 
22019   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
22020   // power-of-2 extensions as they are the most likely.
22021   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
22022     // Check for non power of 2 vector sizes
22023     if (NumElts % Scale != 0)
22024       continue;
22025     if (!isAnyExtend(Scale))
22026       continue;
22027 
22028     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
22029     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
22030     // Never create an illegal type. Only create unsupported operations if we
22031     // are pre-legalization.
22032     if (TLI.isTypeLegal(OutVT))
22033       if (!LegalOperations ||
22034           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
22035         return DAG.getBitcast(VT,
22036                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
22037                                           SDLoc(SVN), OutVT, N0));
22038   }
22039 
22040   return SDValue();
22041 }
22042 
22043 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
22044 // each source element of a large type into the lowest elements of a smaller
22045 // destination type. This is often generated during legalization.
22046 // If the source node itself was a '*_extend_vector_inreg' node then we should
22047 // then be able to remove it.
22048 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
22049                                         SelectionDAG &DAG) {
22050   EVT VT = SVN->getValueType(0);
22051   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
22052 
22053   // TODO Add support for big-endian when we have a test case.
22054   if (!VT.isInteger() || IsBigEndian)
22055     return SDValue();
22056 
22057   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
22058 
22059   unsigned Opcode = N0.getOpcode();
22060   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
22061       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
22062       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
22063     return SDValue();
22064 
22065   SDValue N00 = N0.getOperand(0);
22066   ArrayRef<int> Mask = SVN->getMask();
22067   unsigned NumElts = VT.getVectorNumElements();
22068   unsigned EltSizeInBits = VT.getScalarSizeInBits();
22069   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
22070   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
22071 
22072   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
22073     return SDValue();
22074   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
22075 
22076   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
22077   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
22078   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
22079   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
22080     for (unsigned i = 0; i != NumElts; ++i) {
22081       if (Mask[i] < 0)
22082         continue;
22083       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
22084         continue;
22085       return false;
22086     }
22087     return true;
22088   };
22089 
22090   // At the moment we just handle the case where we've truncated back to the
22091   // same size as before the extension.
22092   // TODO: handle more extension/truncation cases as cases arise.
22093   if (EltSizeInBits != ExtSrcSizeInBits)
22094     return SDValue();
22095 
22096   // We can remove *extend_vector_inreg only if the truncation happens at
22097   // the same scale as the extension.
22098   if (isTruncate(ExtScale))
22099     return DAG.getBitcast(VT, N00);
22100 
22101   return SDValue();
22102 }
22103 
22104 // Combine shuffles of splat-shuffles of the form:
22105 // shuffle (shuffle V, undef, splat-mask), undef, M
22106 // If splat-mask contains undef elements, we need to be careful about
22107 // introducing undef's in the folded mask which are not the result of composing
22108 // the masks of the shuffles.
22109 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
22110                                         SelectionDAG &DAG) {
22111   if (!Shuf->getOperand(1).isUndef())
22112     return SDValue();
22113 
22114   // If the inner operand is a known splat with no undefs, just return that directly.
22115   // TODO: Create DemandedElts mask from Shuf's mask.
22116   // TODO: Allow undef elements and merge with the shuffle code below.
22117   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
22118     return Shuf->getOperand(0);
22119 
22120   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22121   if (!Splat || !Splat->isSplat())
22122     return SDValue();
22123 
22124   ArrayRef<int> ShufMask = Shuf->getMask();
22125   ArrayRef<int> SplatMask = Splat->getMask();
22126   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
22127 
22128   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
22129   // every undef mask element in the splat-shuffle has a corresponding undef
22130   // element in the user-shuffle's mask or if the composition of mask elements
22131   // would result in undef.
22132   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
22133   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
22134   //   In this case it is not legal to simplify to the splat-shuffle because we
22135   //   may be exposing the users of the shuffle an undef element at index 1
22136   //   which was not there before the combine.
22137   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
22138   //   In this case the composition of masks yields SplatMask, so it's ok to
22139   //   simplify to the splat-shuffle.
22140   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
22141   //   In this case the composed mask includes all undef elements of SplatMask
22142   //   and in addition sets element zero to undef. It is safe to simplify to
22143   //   the splat-shuffle.
22144   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
22145                                        ArrayRef<int> SplatMask) {
22146     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
22147       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
22148           SplatMask[UserMask[i]] != -1)
22149         return false;
22150     return true;
22151   };
22152   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
22153     return Shuf->getOperand(0);
22154 
22155   // Create a new shuffle with a mask that is composed of the two shuffles'
22156   // masks.
22157   SmallVector<int, 32> NewMask;
22158   for (int Idx : ShufMask)
22159     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
22160 
22161   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
22162                               Splat->getOperand(0), Splat->getOperand(1),
22163                               NewMask);
22164 }
22165 
22166 // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
22167 // the mask can be treated as a larger type.
22168 static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
22169                                        SelectionDAG &DAG,
22170                                        const TargetLowering &TLI,
22171                                        bool LegalOperations) {
22172   SDValue Op0 = SVN->getOperand(0);
22173   SDValue Op1 = SVN->getOperand(1);
22174   EVT VT = SVN->getValueType(0);
22175   if (Op0.getOpcode() != ISD::BITCAST)
22176     return SDValue();
22177   EVT InVT = Op0.getOperand(0).getValueType();
22178   if (!InVT.isVector() ||
22179       (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
22180                           Op1.getOperand(0).getValueType() != InVT)))
22181     return SDValue();
22182   if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
22183       (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
22184     return SDValue();
22185 
22186   int VTLanes = VT.getVectorNumElements();
22187   int InLanes = InVT.getVectorNumElements();
22188   if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
22189       (LegalOperations &&
22190        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
22191     return SDValue();
22192   int Factor = VTLanes / InLanes;
22193 
22194   // Check that each group of lanes in the mask are either undef or make a valid
22195   // mask for the wider lane type.
22196   ArrayRef<int> Mask = SVN->getMask();
22197   SmallVector<int> NewMask;
22198   if (!widenShuffleMaskElts(Factor, Mask, NewMask))
22199     return SDValue();
22200 
22201   if (!TLI.isShuffleMaskLegal(NewMask, InVT))
22202     return SDValue();
22203 
22204   // Create the new shuffle with the new mask and bitcast it back to the
22205   // original type.
22206   SDLoc DL(SVN);
22207   Op0 = Op0.getOperand(0);
22208   Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
22209   SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
22210   return DAG.getBitcast(VT, NewShuf);
22211 }
22212 
22213 /// Combine shuffle of shuffle of the form:
22214 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
22215 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
22216                                      SelectionDAG &DAG) {
22217   if (!OuterShuf->getOperand(1).isUndef())
22218     return SDValue();
22219   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
22220   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
22221     return SDValue();
22222 
22223   ArrayRef<int> OuterMask = OuterShuf->getMask();
22224   ArrayRef<int> InnerMask = InnerShuf->getMask();
22225   unsigned NumElts = OuterMask.size();
22226   assert(NumElts == InnerMask.size() && "Mask length mismatch");
22227   SmallVector<int, 32> CombinedMask(NumElts, -1);
22228   int SplatIndex = -1;
22229   for (unsigned i = 0; i != NumElts; ++i) {
22230     // Undef lanes remain undef.
22231     int OuterMaskElt = OuterMask[i];
22232     if (OuterMaskElt == -1)
22233       continue;
22234 
22235     // Peek through the shuffle masks to get the underlying source element.
22236     int InnerMaskElt = InnerMask[OuterMaskElt];
22237     if (InnerMaskElt == -1)
22238       continue;
22239 
22240     // Initialize the splatted element.
22241     if (SplatIndex == -1)
22242       SplatIndex = InnerMaskElt;
22243 
22244     // Non-matching index - this is not a splat.
22245     if (SplatIndex != InnerMaskElt)
22246       return SDValue();
22247 
22248     CombinedMask[i] = InnerMaskElt;
22249   }
22250   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
22251           getSplatIndex(CombinedMask) != -1) &&
22252          "Expected a splat mask");
22253 
22254   // TODO: The transform may be a win even if the mask is not legal.
22255   EVT VT = OuterShuf->getValueType(0);
22256   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
22257   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
22258     return SDValue();
22259 
22260   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
22261                               InnerShuf->getOperand(1), CombinedMask);
22262 }
22263 
22264 /// If the shuffle mask is taking exactly one element from the first vector
22265 /// operand and passing through all other elements from the second vector
22266 /// operand, return the index of the mask element that is choosing an element
22267 /// from the first operand. Otherwise, return -1.
22268 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
22269   int MaskSize = Mask.size();
22270   int EltFromOp0 = -1;
22271   // TODO: This does not match if there are undef elements in the shuffle mask.
22272   // Should we ignore undefs in the shuffle mask instead? The trade-off is
22273   // removing an instruction (a shuffle), but losing the knowledge that some
22274   // vector lanes are not needed.
22275   for (int i = 0; i != MaskSize; ++i) {
22276     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
22277       // We're looking for a shuffle of exactly one element from operand 0.
22278       if (EltFromOp0 != -1)
22279         return -1;
22280       EltFromOp0 = i;
22281     } else if (Mask[i] != i + MaskSize) {
22282       // Nothing from operand 1 can change lanes.
22283       return -1;
22284     }
22285   }
22286   return EltFromOp0;
22287 }
22288 
22289 /// If a shuffle inserts exactly one element from a source vector operand into
22290 /// another vector operand and we can access the specified element as a scalar,
22291 /// then we can eliminate the shuffle.
22292 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
22293                                       SelectionDAG &DAG) {
22294   // First, check if we are taking one element of a vector and shuffling that
22295   // element into another vector.
22296   ArrayRef<int> Mask = Shuf->getMask();
22297   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
22298   SDValue Op0 = Shuf->getOperand(0);
22299   SDValue Op1 = Shuf->getOperand(1);
22300   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
22301   if (ShufOp0Index == -1) {
22302     // Commute mask and check again.
22303     ShuffleVectorSDNode::commuteMask(CommutedMask);
22304     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
22305     if (ShufOp0Index == -1)
22306       return SDValue();
22307     // Commute operands to match the commuted shuffle mask.
22308     std::swap(Op0, Op1);
22309     Mask = CommutedMask;
22310   }
22311 
22312   // The shuffle inserts exactly one element from operand 0 into operand 1.
22313   // Now see if we can access that element as a scalar via a real insert element
22314   // instruction.
22315   // TODO: We can try harder to locate the element as a scalar. Examples: it
22316   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
22317   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
22318          "Shuffle mask value must be from operand 0");
22319   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
22320     return SDValue();
22321 
22322   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
22323   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
22324     return SDValue();
22325 
22326   // There's an existing insertelement with constant insertion index, so we
22327   // don't need to check the legality/profitability of a replacement operation
22328   // that differs at most in the constant value. The target should be able to
22329   // lower any of those in a similar way. If not, legalization will expand this
22330   // to a scalar-to-vector plus shuffle.
22331   //
22332   // Note that the shuffle may move the scalar from the position that the insert
22333   // element used. Therefore, our new insert element occurs at the shuffle's
22334   // mask index value, not the insert's index value.
22335   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
22336   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
22337   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
22338                      Op1, Op0.getOperand(1), NewInsIndex);
22339 }
22340 
22341 /// If we have a unary shuffle of a shuffle, see if it can be folded away
22342 /// completely. This has the potential to lose undef knowledge because the first
22343 /// shuffle may not have an undef mask element where the second one does. So
22344 /// only call this after doing simplifications based on demanded elements.
22345 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
22346   // shuf (shuf0 X, Y, Mask0), undef, Mask
22347   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
22348   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
22349     return SDValue();
22350 
22351   ArrayRef<int> Mask = Shuf->getMask();
22352   ArrayRef<int> Mask0 = Shuf0->getMask();
22353   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
22354     // Ignore undef elements.
22355     if (Mask[i] == -1)
22356       continue;
22357     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
22358 
22359     // Is the element of the shuffle operand chosen by this shuffle the same as
22360     // the element chosen by the shuffle operand itself?
22361     if (Mask0[Mask[i]] != Mask0[i])
22362       return SDValue();
22363   }
22364   // Every element of this shuffle is identical to the result of the previous
22365   // shuffle, so we can replace this value.
22366   return Shuf->getOperand(0);
22367 }
22368 
22369 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
22370   EVT VT = N->getValueType(0);
22371   unsigned NumElts = VT.getVectorNumElements();
22372 
22373   SDValue N0 = N->getOperand(0);
22374   SDValue N1 = N->getOperand(1);
22375 
22376   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
22377 
22378   // Canonicalize shuffle undef, undef -> undef
22379   if (N0.isUndef() && N1.isUndef())
22380     return DAG.getUNDEF(VT);
22381 
22382   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
22383 
22384   // Canonicalize shuffle v, v -> v, undef
22385   if (N0 == N1)
22386     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
22387                                 createUnaryMask(SVN->getMask(), NumElts));
22388 
22389   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
22390   if (N0.isUndef())
22391     return DAG.getCommutedVectorShuffle(*SVN);
22392 
22393   // Remove references to rhs if it is undef
22394   if (N1.isUndef()) {
22395     bool Changed = false;
22396     SmallVector<int, 8> NewMask;
22397     for (unsigned i = 0; i != NumElts; ++i) {
22398       int Idx = SVN->getMaskElt(i);
22399       if (Idx >= (int)NumElts) {
22400         Idx = -1;
22401         Changed = true;
22402       }
22403       NewMask.push_back(Idx);
22404     }
22405     if (Changed)
22406       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
22407   }
22408 
22409   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
22410     return InsElt;
22411 
22412   // A shuffle of a single vector that is a splatted value can always be folded.
22413   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
22414     return V;
22415 
22416   if (SDValue V = formSplatFromShuffles(SVN, DAG))
22417     return V;
22418 
22419   // If it is a splat, check if the argument vector is another splat or a
22420   // build_vector.
22421   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
22422     int SplatIndex = SVN->getSplatIndex();
22423     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
22424         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
22425       // splat (vector_bo L, R), Index -->
22426       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
22427       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
22428       SDLoc DL(N);
22429       EVT EltVT = VT.getScalarType();
22430       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
22431       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
22432       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
22433       SDValue NewBO =
22434           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
22435       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
22436       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
22437       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
22438     }
22439 
22440     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
22441     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
22442     if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
22443         N0.hasOneUse()) {
22444       if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
22445         return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
22446 
22447       if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
22448         if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
22449           if (Idx->getAPIntValue() == SplatIndex)
22450             return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
22451     }
22452 
22453     // If this is a bit convert that changes the element type of the vector but
22454     // not the number of vector elements, look through it.  Be careful not to
22455     // look though conversions that change things like v4f32 to v2f64.
22456     SDNode *V = N0.getNode();
22457     if (V->getOpcode() == ISD::BITCAST) {
22458       SDValue ConvInput = V->getOperand(0);
22459       if (ConvInput.getValueType().isVector() &&
22460           ConvInput.getValueType().getVectorNumElements() == NumElts)
22461         V = ConvInput.getNode();
22462     }
22463 
22464     if (V->getOpcode() == ISD::BUILD_VECTOR) {
22465       assert(V->getNumOperands() == NumElts &&
22466              "BUILD_VECTOR has wrong number of operands");
22467       SDValue Base;
22468       bool AllSame = true;
22469       for (unsigned i = 0; i != NumElts; ++i) {
22470         if (!V->getOperand(i).isUndef()) {
22471           Base = V->getOperand(i);
22472           break;
22473         }
22474       }
22475       // Splat of <u, u, u, u>, return <u, u, u, u>
22476       if (!Base.getNode())
22477         return N0;
22478       for (unsigned i = 0; i != NumElts; ++i) {
22479         if (V->getOperand(i) != Base) {
22480           AllSame = false;
22481           break;
22482         }
22483       }
22484       // Splat of <x, x, x, x>, return <x, x, x, x>
22485       if (AllSame)
22486         return N0;
22487 
22488       // Canonicalize any other splat as a build_vector.
22489       SDValue Splatted = V->getOperand(SplatIndex);
22490       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
22491       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
22492 
22493       // We may have jumped through bitcasts, so the type of the
22494       // BUILD_VECTOR may not match the type of the shuffle.
22495       if (V->getValueType(0) != VT)
22496         NewBV = DAG.getBitcast(VT, NewBV);
22497       return NewBV;
22498     }
22499   }
22500 
22501   // Simplify source operands based on shuffle mask.
22502   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
22503     return SDValue(N, 0);
22504 
22505   // This is intentionally placed after demanded elements simplification because
22506   // it could eliminate knowledge of undef elements created by this shuffle.
22507   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
22508     return ShufOp;
22509 
22510   // Match shuffles that can be converted to any_vector_extend_in_reg.
22511   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
22512     return V;
22513 
22514   // Combine "truncate_vector_in_reg" style shuffles.
22515   if (SDValue V = combineTruncationShuffle(SVN, DAG))
22516     return V;
22517 
22518   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
22519       Level < AfterLegalizeVectorOps &&
22520       (N1.isUndef() ||
22521       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
22522        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
22523     if (SDValue V = partitionShuffleOfConcats(N, DAG))
22524       return V;
22525   }
22526 
22527   // A shuffle of a concat of the same narrow vector can be reduced to use
22528   // only low-half elements of a concat with undef:
22529   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
22530   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
22531       N0.getNumOperands() == 2 &&
22532       N0.getOperand(0) == N0.getOperand(1)) {
22533     int HalfNumElts = (int)NumElts / 2;
22534     SmallVector<int, 8> NewMask;
22535     for (unsigned i = 0; i != NumElts; ++i) {
22536       int Idx = SVN->getMaskElt(i);
22537       if (Idx >= HalfNumElts) {
22538         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
22539         Idx -= HalfNumElts;
22540       }
22541       NewMask.push_back(Idx);
22542     }
22543     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
22544       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
22545       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
22546                                    N0.getOperand(0), UndefVec);
22547       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
22548     }
22549   }
22550 
22551   // See if we can replace a shuffle with an insert_subvector.
22552   // e.g. v2i32 into v8i32:
22553   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
22554   // --> insert_subvector(lhs,rhs1,4).
22555   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
22556       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
22557     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
22558       // Ensure RHS subvectors are legal.
22559       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
22560       EVT SubVT = RHS.getOperand(0).getValueType();
22561       int NumSubVecs = RHS.getNumOperands();
22562       int NumSubElts = SubVT.getVectorNumElements();
22563       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
22564       if (!TLI.isTypeLegal(SubVT))
22565         return SDValue();
22566 
22567       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
22568       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
22569         return SDValue();
22570 
22571       // Search [NumSubElts] spans for RHS sequence.
22572       // TODO: Can we avoid nested loops to increase performance?
22573       SmallVector<int> InsertionMask(NumElts);
22574       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
22575         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
22576           // Reset mask to identity.
22577           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
22578 
22579           // Add subvector insertion.
22580           std::iota(InsertionMask.begin() + SubIdx,
22581                     InsertionMask.begin() + SubIdx + NumSubElts,
22582                     NumElts + (SubVec * NumSubElts));
22583 
22584           // See if the shuffle mask matches the reference insertion mask.
22585           bool MatchingShuffle = true;
22586           for (int i = 0; i != (int)NumElts; ++i) {
22587             int ExpectIdx = InsertionMask[i];
22588             int ActualIdx = Mask[i];
22589             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
22590               MatchingShuffle = false;
22591               break;
22592             }
22593           }
22594 
22595           if (MatchingShuffle)
22596             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
22597                                RHS.getOperand(SubVec),
22598                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
22599         }
22600       }
22601       return SDValue();
22602     };
22603     ArrayRef<int> Mask = SVN->getMask();
22604     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
22605       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
22606         return InsertN1;
22607     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
22608       SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
22609       ShuffleVectorSDNode::commuteMask(CommuteMask);
22610       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
22611         return InsertN0;
22612     }
22613   }
22614 
22615   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
22616   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
22617   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
22618     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
22619       return Res;
22620 
22621   // If this shuffle only has a single input that is a bitcasted shuffle,
22622   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
22623   // back to their original types.
22624   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
22625       N1.isUndef() && Level < AfterLegalizeVectorOps &&
22626       TLI.isTypeLegal(VT)) {
22627 
22628     SDValue BC0 = peekThroughOneUseBitcasts(N0);
22629     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
22630       EVT SVT = VT.getScalarType();
22631       EVT InnerVT = BC0->getValueType(0);
22632       EVT InnerSVT = InnerVT.getScalarType();
22633 
22634       // Determine which shuffle works with the smaller scalar type.
22635       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
22636       EVT ScaleSVT = ScaleVT.getScalarType();
22637 
22638       if (TLI.isTypeLegal(ScaleVT) &&
22639           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
22640           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
22641         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22642         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
22643 
22644         // Scale the shuffle masks to the smaller scalar type.
22645         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
22646         SmallVector<int, 8> InnerMask;
22647         SmallVector<int, 8> OuterMask;
22648         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
22649         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
22650 
22651         // Merge the shuffle masks.
22652         SmallVector<int, 8> NewMask;
22653         for (int M : OuterMask)
22654           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
22655 
22656         // Test for shuffle mask legality over both commutations.
22657         SDValue SV0 = BC0->getOperand(0);
22658         SDValue SV1 = BC0->getOperand(1);
22659         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22660         if (!LegalMask) {
22661           std::swap(SV0, SV1);
22662           ShuffleVectorSDNode::commuteMask(NewMask);
22663           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
22664         }
22665 
22666         if (LegalMask) {
22667           SV0 = DAG.getBitcast(ScaleVT, SV0);
22668           SV1 = DAG.getBitcast(ScaleVT, SV1);
22669           return DAG.getBitcast(
22670               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
22671         }
22672       }
22673     }
22674   }
22675 
22676   // Match shuffles of bitcasts, so long as the mask can be treated as the
22677   // larger type.
22678   if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
22679     return V;
22680 
22681   // Compute the combined shuffle mask for a shuffle with SV0 as the first
22682   // operand, and SV1 as the second operand.
22683   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
22684   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
22685   auto MergeInnerShuffle =
22686       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
22687                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
22688                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
22689                      SmallVectorImpl<int> &Mask) -> bool {
22690     // Don't try to fold splats; they're likely to simplify somehow, or they
22691     // might be free.
22692     if (OtherSVN->isSplat())
22693       return false;
22694 
22695     SV0 = SV1 = SDValue();
22696     Mask.clear();
22697 
22698     for (unsigned i = 0; i != NumElts; ++i) {
22699       int Idx = SVN->getMaskElt(i);
22700       if (Idx < 0) {
22701         // Propagate Undef.
22702         Mask.push_back(Idx);
22703         continue;
22704       }
22705 
22706       if (Commute)
22707         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
22708 
22709       SDValue CurrentVec;
22710       if (Idx < (int)NumElts) {
22711         // This shuffle index refers to the inner shuffle N0. Lookup the inner
22712         // shuffle mask to identify which vector is actually referenced.
22713         Idx = OtherSVN->getMaskElt(Idx);
22714         if (Idx < 0) {
22715           // Propagate Undef.
22716           Mask.push_back(Idx);
22717           continue;
22718         }
22719         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
22720                                           : OtherSVN->getOperand(1);
22721       } else {
22722         // This shuffle index references an element within N1.
22723         CurrentVec = N1;
22724       }
22725 
22726       // Simple case where 'CurrentVec' is UNDEF.
22727       if (CurrentVec.isUndef()) {
22728         Mask.push_back(-1);
22729         continue;
22730       }
22731 
22732       // Canonicalize the shuffle index. We don't know yet if CurrentVec
22733       // will be the first or second operand of the combined shuffle.
22734       Idx = Idx % NumElts;
22735       if (!SV0.getNode() || SV0 == CurrentVec) {
22736         // Ok. CurrentVec is the left hand side.
22737         // Update the mask accordingly.
22738         SV0 = CurrentVec;
22739         Mask.push_back(Idx);
22740         continue;
22741       }
22742       if (!SV1.getNode() || SV1 == CurrentVec) {
22743         // Ok. CurrentVec is the right hand side.
22744         // Update the mask accordingly.
22745         SV1 = CurrentVec;
22746         Mask.push_back(Idx + NumElts);
22747         continue;
22748       }
22749 
22750       // Last chance - see if the vector is another shuffle and if it
22751       // uses one of the existing candidate shuffle ops.
22752       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
22753         int InnerIdx = CurrentSVN->getMaskElt(Idx);
22754         if (InnerIdx < 0) {
22755           Mask.push_back(-1);
22756           continue;
22757         }
22758         SDValue InnerVec = (InnerIdx < (int)NumElts)
22759                                ? CurrentSVN->getOperand(0)
22760                                : CurrentSVN->getOperand(1);
22761         if (InnerVec.isUndef()) {
22762           Mask.push_back(-1);
22763           continue;
22764         }
22765         InnerIdx %= NumElts;
22766         if (InnerVec == SV0) {
22767           Mask.push_back(InnerIdx);
22768           continue;
22769         }
22770         if (InnerVec == SV1) {
22771           Mask.push_back(InnerIdx + NumElts);
22772           continue;
22773         }
22774       }
22775 
22776       // Bail out if we cannot convert the shuffle pair into a single shuffle.
22777       return false;
22778     }
22779 
22780     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22781       return true;
22782 
22783     // Avoid introducing shuffles with illegal mask.
22784     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22785     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22786     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22787     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
22788     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
22789     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
22790     if (TLI.isShuffleMaskLegal(Mask, VT))
22791       return true;
22792 
22793     std::swap(SV0, SV1);
22794     ShuffleVectorSDNode::commuteMask(Mask);
22795     return TLI.isShuffleMaskLegal(Mask, VT);
22796   };
22797 
22798   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
22799     // Canonicalize shuffles according to rules:
22800     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
22801     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
22802     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
22803     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22804         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
22805       // The incoming shuffle must be of the same type as the result of the
22806       // current shuffle.
22807       assert(N1->getOperand(0).getValueType() == VT &&
22808              "Shuffle types don't match");
22809 
22810       SDValue SV0 = N1->getOperand(0);
22811       SDValue SV1 = N1->getOperand(1);
22812       bool HasSameOp0 = N0 == SV0;
22813       bool IsSV1Undef = SV1.isUndef();
22814       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
22815         // Commute the operands of this shuffle so merging below will trigger.
22816         return DAG.getCommutedVectorShuffle(*SVN);
22817     }
22818 
22819     // Canonicalize splat shuffles to the RHS to improve merging below.
22820     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
22821     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
22822         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
22823         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
22824         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
22825       return DAG.getCommutedVectorShuffle(*SVN);
22826     }
22827 
22828     // Try to fold according to rules:
22829     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
22830     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
22831     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
22832     // Don't try to fold shuffles with illegal type.
22833     // Only fold if this shuffle is the only user of the other shuffle.
22834     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
22835     for (int i = 0; i != 2; ++i) {
22836       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
22837           N->isOnlyUserOf(N->getOperand(i).getNode())) {
22838         // The incoming shuffle must be of the same type as the result of the
22839         // current shuffle.
22840         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
22841         assert(OtherSV->getOperand(0).getValueType() == VT &&
22842                "Shuffle types don't match");
22843 
22844         SDValue SV0, SV1;
22845         SmallVector<int, 4> Mask;
22846         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
22847                               SV0, SV1, Mask)) {
22848           // Check if all indices in Mask are Undef. In case, propagate Undef.
22849           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
22850             return DAG.getUNDEF(VT);
22851 
22852           return DAG.getVectorShuffle(VT, SDLoc(N),
22853                                       SV0 ? SV0 : DAG.getUNDEF(VT),
22854                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
22855         }
22856       }
22857     }
22858 
22859     // Merge shuffles through binops if we are able to merge it with at least
22860     // one other shuffles.
22861     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
22862     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
22863     unsigned SrcOpcode = N0.getOpcode();
22864     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
22865         (N1.isUndef() ||
22866          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
22867       // Get binop source ops, or just pass on the undef.
22868       SDValue Op00 = N0.getOperand(0);
22869       SDValue Op01 = N0.getOperand(1);
22870       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
22871       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
22872       // TODO: We might be able to relax the VT check but we don't currently
22873       // have any isBinOp() that has different result/ops VTs so play safe until
22874       // we have test coverage.
22875       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
22876           Op01.getValueType() == VT && Op11.getValueType() == VT &&
22877           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
22878            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
22879            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
22880            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
22881         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
22882                                         SmallVectorImpl<int> &Mask, bool LeftOp,
22883                                         bool Commute) {
22884           SDValue InnerN = Commute ? N1 : N0;
22885           SDValue Op0 = LeftOp ? Op00 : Op01;
22886           SDValue Op1 = LeftOp ? Op10 : Op11;
22887           if (Commute)
22888             std::swap(Op0, Op1);
22889           // Only accept the merged shuffle if we don't introduce undef elements,
22890           // or the inner shuffle already contained undef elements.
22891           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
22892           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
22893                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
22894                                    Mask) &&
22895                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
22896                   llvm::none_of(Mask, [](int M) { return M < 0; }));
22897         };
22898 
22899         // Ensure we don't increase the number of shuffles - we must merge a
22900         // shuffle from at least one of the LHS and RHS ops.
22901         bool MergedLeft = false;
22902         SDValue LeftSV0, LeftSV1;
22903         SmallVector<int, 4> LeftMask;
22904         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
22905             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
22906           MergedLeft = true;
22907         } else {
22908           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22909           LeftSV0 = Op00, LeftSV1 = Op10;
22910         }
22911 
22912         bool MergedRight = false;
22913         SDValue RightSV0, RightSV1;
22914         SmallVector<int, 4> RightMask;
22915         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
22916             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
22917           MergedRight = true;
22918         } else {
22919           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
22920           RightSV0 = Op01, RightSV1 = Op11;
22921         }
22922 
22923         if (MergedLeft || MergedRight) {
22924           SDLoc DL(N);
22925           SDValue LHS = DAG.getVectorShuffle(
22926               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
22927               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
22928           SDValue RHS = DAG.getVectorShuffle(
22929               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
22930               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
22931           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
22932         }
22933       }
22934     }
22935   }
22936 
22937   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
22938     return V;
22939 
22940   return SDValue();
22941 }
22942 
22943 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
22944   SDValue InVal = N->getOperand(0);
22945   EVT VT = N->getValueType(0);
22946 
22947   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
22948   // with a VECTOR_SHUFFLE and possible truncate.
22949   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22950       VT.isFixedLengthVector() &&
22951       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
22952     SDValue InVec = InVal->getOperand(0);
22953     SDValue EltNo = InVal->getOperand(1);
22954     auto InVecT = InVec.getValueType();
22955     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
22956       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
22957       int Elt = C0->getZExtValue();
22958       NewMask[0] = Elt;
22959       // If we have an implict truncate do truncate here as long as it's legal.
22960       // if it's not legal, this should
22961       if (VT.getScalarType() != InVal.getValueType() &&
22962           InVal.getValueType().isScalarInteger() &&
22963           isTypeLegal(VT.getScalarType())) {
22964         SDValue Val =
22965             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
22966         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
22967       }
22968       if (VT.getScalarType() == InVecT.getScalarType() &&
22969           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
22970         SDValue LegalShuffle =
22971           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
22972                                       DAG.getUNDEF(InVecT), NewMask, DAG);
22973         if (LegalShuffle) {
22974           // If the initial vector is the correct size this shuffle is a
22975           // valid result.
22976           if (VT == InVecT)
22977             return LegalShuffle;
22978           // If not we must truncate the vector.
22979           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
22980             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
22981             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
22982                                          InVecT.getVectorElementType(),
22983                                          VT.getVectorNumElements());
22984             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
22985                                LegalShuffle, ZeroIdx);
22986           }
22987         }
22988       }
22989     }
22990   }
22991 
22992   return SDValue();
22993 }
22994 
22995 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
22996   EVT VT = N->getValueType(0);
22997   SDValue N0 = N->getOperand(0);
22998   SDValue N1 = N->getOperand(1);
22999   SDValue N2 = N->getOperand(2);
23000   uint64_t InsIdx = N->getConstantOperandVal(2);
23001 
23002   // If inserting an UNDEF, just return the original vector.
23003   if (N1.isUndef())
23004     return N0;
23005 
23006   // If this is an insert of an extracted vector into an undef vector, we can
23007   // just use the input to the extract.
23008   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
23009       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
23010     return N1.getOperand(0);
23011 
23012   // Simplify scalar inserts into an undef vector:
23013   // insert_subvector undef, (splat X), N2 -> splat X
23014   if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
23015     return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
23016 
23017   // If we are inserting a bitcast value into an undef, with the same
23018   // number of elements, just use the bitcast input of the extract.
23019   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
23020   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
23021   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
23022       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
23023       N1.getOperand(0).getOperand(1) == N2 &&
23024       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
23025           VT.getVectorElementCount() &&
23026       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
23027           VT.getSizeInBits()) {
23028     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
23029   }
23030 
23031   // If both N1 and N2 are bitcast values on which insert_subvector
23032   // would makes sense, pull the bitcast through.
23033   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
23034   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
23035   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
23036     SDValue CN0 = N0.getOperand(0);
23037     SDValue CN1 = N1.getOperand(0);
23038     EVT CN0VT = CN0.getValueType();
23039     EVT CN1VT = CN1.getValueType();
23040     if (CN0VT.isVector() && CN1VT.isVector() &&
23041         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
23042         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
23043       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
23044                                       CN0.getValueType(), CN0, CN1, N2);
23045       return DAG.getBitcast(VT, NewINSERT);
23046     }
23047   }
23048 
23049   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
23050   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
23051   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
23052   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
23053       N0.getOperand(1).getValueType() == N1.getValueType() &&
23054       N0.getOperand(2) == N2)
23055     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
23056                        N1, N2);
23057 
23058   // Eliminate an intermediate insert into an undef vector:
23059   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
23060   // insert_subvector undef, X, N2
23061   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
23062       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
23063     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
23064                        N1.getOperand(1), N2);
23065 
23066   // Push subvector bitcasts to the output, adjusting the index as we go.
23067   // insert_subvector(bitcast(v), bitcast(s), c1)
23068   // -> bitcast(insert_subvector(v, s, c2))
23069   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
23070       N1.getOpcode() == ISD::BITCAST) {
23071     SDValue N0Src = peekThroughBitcasts(N0);
23072     SDValue N1Src = peekThroughBitcasts(N1);
23073     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
23074     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
23075     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
23076         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
23077       EVT NewVT;
23078       SDLoc DL(N);
23079       SDValue NewIdx;
23080       LLVMContext &Ctx = *DAG.getContext();
23081       ElementCount NumElts = VT.getVectorElementCount();
23082       unsigned EltSizeInBits = VT.getScalarSizeInBits();
23083       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
23084         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
23085         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
23086         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
23087       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
23088         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
23089         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
23090           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
23091                                    NumElts.divideCoefficientBy(Scale));
23092           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
23093         }
23094       }
23095       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
23096         SDValue Res = DAG.getBitcast(NewVT, N0Src);
23097         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
23098         return DAG.getBitcast(VT, Res);
23099       }
23100     }
23101   }
23102 
23103   // Canonicalize insert_subvector dag nodes.
23104   // Example:
23105   // (insert_subvector (insert_subvector A, Idx0), Idx1)
23106   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
23107   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
23108       N1.getValueType() == N0.getOperand(1).getValueType()) {
23109     unsigned OtherIdx = N0.getConstantOperandVal(2);
23110     if (InsIdx < OtherIdx) {
23111       // Swap nodes.
23112       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
23113                                   N0.getOperand(0), N1, N2);
23114       AddToWorklist(NewOp.getNode());
23115       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
23116                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
23117     }
23118   }
23119 
23120   // If the input vector is a concatenation, and the insert replaces
23121   // one of the pieces, we can optimize into a single concat_vectors.
23122   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
23123       N0.getOperand(0).getValueType() == N1.getValueType() &&
23124       N0.getOperand(0).getValueType().isScalableVector() ==
23125           N1.getValueType().isScalableVector()) {
23126     unsigned Factor = N1.getValueType().getVectorMinNumElements();
23127     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
23128     Ops[InsIdx / Factor] = N1;
23129     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23130   }
23131 
23132   // Simplify source operands based on insertion.
23133   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
23134     return SDValue(N, 0);
23135 
23136   return SDValue();
23137 }
23138 
23139 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
23140   SDValue N0 = N->getOperand(0);
23141 
23142   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
23143   if (N0->getOpcode() == ISD::FP16_TO_FP)
23144     return N0->getOperand(0);
23145 
23146   return SDValue();
23147 }
23148 
23149 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
23150   SDValue N0 = N->getOperand(0);
23151 
23152   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
23153   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
23154     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
23155     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
23156       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
23157                          N0.getOperand(0));
23158     }
23159   }
23160 
23161   return SDValue();
23162 }
23163 
23164 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
23165   SDValue N0 = N->getOperand(0);
23166 
23167   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
23168   if (N0->getOpcode() == ISD::BF16_TO_FP)
23169     return N0->getOperand(0);
23170 
23171   return SDValue();
23172 }
23173 
23174 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
23175   SDValue N0 = N->getOperand(0);
23176   EVT VT = N0.getValueType();
23177   unsigned Opcode = N->getOpcode();
23178 
23179   // VECREDUCE over 1-element vector is just an extract.
23180   if (VT.getVectorElementCount().isScalar()) {
23181     SDLoc dl(N);
23182     SDValue Res =
23183         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
23184                     DAG.getVectorIdxConstant(0, dl));
23185     if (Res.getValueType() != N->getValueType(0))
23186       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
23187     return Res;
23188   }
23189 
23190   // On an boolean vector an and/or reduction is the same as a umin/umax
23191   // reduction. Convert them if the latter is legal while the former isn't.
23192   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
23193     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
23194         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
23195     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
23196         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
23197         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
23198       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
23199   }
23200 
23201   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
23202   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
23203   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
23204       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
23205     SDValue Vec = N0.getOperand(0);
23206     SDValue Subvec = N0.getOperand(1);
23207     if ((Opcode == ISD::VECREDUCE_OR &&
23208          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
23209         (Opcode == ISD::VECREDUCE_AND &&
23210          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
23211       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
23212   }
23213 
23214   return SDValue();
23215 }
23216 
23217 SDValue DAGCombiner::visitVPOp(SDNode *N) {
23218   // VP operations in which all vector elements are disabled - either by
23219   // determining that the mask is all false or that the EVL is 0 - can be
23220   // eliminated.
23221   bool AreAllEltsDisabled = false;
23222   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
23223     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
23224   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
23225     AreAllEltsDisabled |=
23226         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
23227 
23228   // This is the only generic VP combine we support for now.
23229   if (!AreAllEltsDisabled)
23230     return SDValue();
23231 
23232   // Binary operations can be replaced by UNDEF.
23233   if (ISD::isVPBinaryOp(N->getOpcode()))
23234     return DAG.getUNDEF(N->getValueType(0));
23235 
23236   // VP Memory operations can be replaced by either the chain (stores) or the
23237   // chain + undef (loads).
23238   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
23239     if (MemSD->writeMem())
23240       return MemSD->getChain();
23241     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
23242   }
23243 
23244   // Reduction operations return the start operand when no elements are active.
23245   if (ISD::isVPReduction(N->getOpcode()))
23246     return N->getOperand(0);
23247 
23248   return SDValue();
23249 }
23250 
23251 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
23252 /// with the destination vector and a zero vector.
23253 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
23254 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
23255 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
23256   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
23257 
23258   EVT VT = N->getValueType(0);
23259   SDValue LHS = N->getOperand(0);
23260   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
23261   SDLoc DL(N);
23262 
23263   // Make sure we're not running after operation legalization where it
23264   // may have custom lowered the vector shuffles.
23265   if (LegalOperations)
23266     return SDValue();
23267 
23268   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
23269     return SDValue();
23270 
23271   EVT RVT = RHS.getValueType();
23272   unsigned NumElts = RHS.getNumOperands();
23273 
23274   // Attempt to create a valid clear mask, splitting the mask into
23275   // sub elements and checking to see if each is
23276   // all zeros or all ones - suitable for shuffle masking.
23277   auto BuildClearMask = [&](int Split) {
23278     int NumSubElts = NumElts * Split;
23279     int NumSubBits = RVT.getScalarSizeInBits() / Split;
23280 
23281     SmallVector<int, 8> Indices;
23282     for (int i = 0; i != NumSubElts; ++i) {
23283       int EltIdx = i / Split;
23284       int SubIdx = i % Split;
23285       SDValue Elt = RHS.getOperand(EltIdx);
23286       // X & undef --> 0 (not undef). So this lane must be converted to choose
23287       // from the zero constant vector (same as if the element had all 0-bits).
23288       if (Elt.isUndef()) {
23289         Indices.push_back(i + NumSubElts);
23290         continue;
23291       }
23292 
23293       APInt Bits;
23294       if (isa<ConstantSDNode>(Elt))
23295         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
23296       else if (isa<ConstantFPSDNode>(Elt))
23297         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
23298       else
23299         return SDValue();
23300 
23301       // Extract the sub element from the constant bit mask.
23302       if (DAG.getDataLayout().isBigEndian())
23303         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
23304       else
23305         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
23306 
23307       if (Bits.isAllOnes())
23308         Indices.push_back(i);
23309       else if (Bits == 0)
23310         Indices.push_back(i + NumSubElts);
23311       else
23312         return SDValue();
23313     }
23314 
23315     // Let's see if the target supports this vector_shuffle.
23316     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
23317     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
23318     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
23319       return SDValue();
23320 
23321     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
23322     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
23323                                                    DAG.getBitcast(ClearVT, LHS),
23324                                                    Zero, Indices));
23325   };
23326 
23327   // Determine maximum split level (byte level masking).
23328   int MaxSplit = 1;
23329   if (RVT.getScalarSizeInBits() % 8 == 0)
23330     MaxSplit = RVT.getScalarSizeInBits() / 8;
23331 
23332   for (int Split = 1; Split <= MaxSplit; ++Split)
23333     if (RVT.getScalarSizeInBits() % Split == 0)
23334       if (SDValue S = BuildClearMask(Split))
23335         return S;
23336 
23337   return SDValue();
23338 }
23339 
23340 /// If a vector binop is performed on splat values, it may be profitable to
23341 /// extract, scalarize, and insert/splat.
23342 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
23343                                       const SDLoc &DL) {
23344   SDValue N0 = N->getOperand(0);
23345   SDValue N1 = N->getOperand(1);
23346   unsigned Opcode = N->getOpcode();
23347   EVT VT = N->getValueType(0);
23348   EVT EltVT = VT.getVectorElementType();
23349   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23350 
23351   // TODO: Remove/replace the extract cost check? If the elements are available
23352   //       as scalars, then there may be no extract cost. Should we ask if
23353   //       inserting a scalar back into a vector is cheap instead?
23354   int Index0, Index1;
23355   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
23356   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
23357   if (!Src0 || !Src1 || Index0 != Index1 ||
23358       Src0.getValueType().getVectorElementType() != EltVT ||
23359       Src1.getValueType().getVectorElementType() != EltVT ||
23360       !TLI.isExtractVecEltCheap(VT, Index0) ||
23361       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
23362     return SDValue();
23363 
23364   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
23365   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
23366   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
23367   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
23368 
23369   // If all lanes but 1 are undefined, no need to splat the scalar result.
23370   // TODO: Keep track of undefs and use that info in the general case.
23371   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
23372       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
23373       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
23374     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
23375     // build_vec ..undef, (bo X, Y), undef...
23376     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
23377     Ops[Index0] = ScalarBO;
23378     return DAG.getBuildVector(VT, DL, Ops);
23379   }
23380 
23381   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
23382   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
23383   return DAG.getBuildVector(VT, DL, Ops);
23384 }
23385 
23386 /// Visit a binary vector operation, like ADD.
23387 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
23388   EVT VT = N->getValueType(0);
23389   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
23390 
23391   SDValue LHS = N->getOperand(0);
23392   SDValue RHS = N->getOperand(1);
23393   unsigned Opcode = N->getOpcode();
23394   SDNodeFlags Flags = N->getFlags();
23395 
23396   // Move unary shuffles with identical masks after a vector binop:
23397   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
23398   //   --> shuffle (VBinOp A, B), Undef, Mask
23399   // This does not require type legality checks because we are creating the
23400   // same types of operations that are in the original sequence. We do have to
23401   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
23402   // though. This code is adapted from the identical transform in instcombine.
23403   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
23404       Opcode != ISD::UREM && Opcode != ISD::SREM &&
23405       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
23406     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
23407     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
23408     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
23409         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
23410         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
23411       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
23412                                      RHS.getOperand(0), Flags);
23413       SDValue UndefV = LHS.getOperand(1);
23414       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
23415     }
23416 
23417     // Try to sink a splat shuffle after a binop with a uniform constant.
23418     // This is limited to cases where neither the shuffle nor the constant have
23419     // undefined elements because that could be poison-unsafe or inhibit
23420     // demanded elements analysis. It is further limited to not change a splat
23421     // of an inserted scalar because that may be optimized better by
23422     // load-folding or other target-specific behaviors.
23423     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
23424         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
23425         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23426       // binop (splat X), (splat C) --> splat (binop X, C)
23427       SDValue X = Shuf0->getOperand(0);
23428       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
23429       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23430                                   Shuf0->getMask());
23431     }
23432     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
23433         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
23434         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
23435       // binop (splat C), (splat X) --> splat (binop C, X)
23436       SDValue X = Shuf1->getOperand(0);
23437       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
23438       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
23439                                   Shuf1->getMask());
23440     }
23441   }
23442 
23443   // The following pattern is likely to emerge with vector reduction ops. Moving
23444   // the binary operation ahead of insertion may allow using a narrower vector
23445   // instruction that has better performance than the wide version of the op:
23446   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
23447   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
23448       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
23449       LHS.getOperand(2) == RHS.getOperand(2) &&
23450       (LHS.hasOneUse() || RHS.hasOneUse())) {
23451     SDValue X = LHS.getOperand(1);
23452     SDValue Y = RHS.getOperand(1);
23453     SDValue Z = LHS.getOperand(2);
23454     EVT NarrowVT = X.getValueType();
23455     if (NarrowVT == Y.getValueType() &&
23456         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
23457                                               LegalOperations)) {
23458       // (binop undef, undef) may not return undef, so compute that result.
23459       SDValue VecC =
23460           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
23461       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
23462       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
23463     }
23464   }
23465 
23466   // Make sure all but the first op are undef or constant.
23467   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
23468     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
23469            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
23470              return Op.isUndef() ||
23471                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
23472            });
23473   };
23474 
23475   // The following pattern is likely to emerge with vector reduction ops. Moving
23476   // the binary operation ahead of the concat may allow using a narrower vector
23477   // instruction that has better performance than the wide version of the op:
23478   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
23479   //   concat (VBinOp X, Y), VecC
23480   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
23481       (LHS.hasOneUse() || RHS.hasOneUse())) {
23482     EVT NarrowVT = LHS.getOperand(0).getValueType();
23483     if (NarrowVT == RHS.getOperand(0).getValueType() &&
23484         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
23485       unsigned NumOperands = LHS.getNumOperands();
23486       SmallVector<SDValue, 4> ConcatOps;
23487       for (unsigned i = 0; i != NumOperands; ++i) {
23488         // This constant fold for operands 1 and up.
23489         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
23490                                         RHS.getOperand(i)));
23491       }
23492 
23493       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
23494     }
23495   }
23496 
23497   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
23498     return V;
23499 
23500   return SDValue();
23501 }
23502 
23503 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
23504                                     SDValue N2) {
23505   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
23506 
23507   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
23508                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
23509 
23510   // If we got a simplified select_cc node back from SimplifySelectCC, then
23511   // break it down into a new SETCC node, and a new SELECT node, and then return
23512   // the SELECT node, since we were called with a SELECT node.
23513   if (SCC.getNode()) {
23514     // Check to see if we got a select_cc back (to turn into setcc/select).
23515     // Otherwise, just return whatever node we got back, like fabs.
23516     if (SCC.getOpcode() == ISD::SELECT_CC) {
23517       const SDNodeFlags Flags = N0->getFlags();
23518       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
23519                                   N0.getValueType(),
23520                                   SCC.getOperand(0), SCC.getOperand(1),
23521                                   SCC.getOperand(4), Flags);
23522       AddToWorklist(SETCC.getNode());
23523       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
23524                                          SCC.getOperand(2), SCC.getOperand(3));
23525       SelectNode->setFlags(Flags);
23526       return SelectNode;
23527     }
23528 
23529     return SCC;
23530   }
23531   return SDValue();
23532 }
23533 
23534 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
23535 /// being selected between, see if we can simplify the select.  Callers of this
23536 /// should assume that TheSelect is deleted if this returns true.  As such, they
23537 /// should return the appropriate thing (e.g. the node) back to the top-level of
23538 /// the DAG combiner loop to avoid it being looked at.
23539 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
23540                                     SDValue RHS) {
23541   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23542   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
23543   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
23544     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
23545       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
23546       SDValue Sqrt = RHS;
23547       ISD::CondCode CC;
23548       SDValue CmpLHS;
23549       const ConstantFPSDNode *Zero = nullptr;
23550 
23551       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
23552         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
23553         CmpLHS = TheSelect->getOperand(0);
23554         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
23555       } else {
23556         // SELECT or VSELECT
23557         SDValue Cmp = TheSelect->getOperand(0);
23558         if (Cmp.getOpcode() == ISD::SETCC) {
23559           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
23560           CmpLHS = Cmp.getOperand(0);
23561           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
23562         }
23563       }
23564       if (Zero && Zero->isZero() &&
23565           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
23566           CC == ISD::SETULT || CC == ISD::SETLT)) {
23567         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
23568         CombineTo(TheSelect, Sqrt);
23569         return true;
23570       }
23571     }
23572   }
23573   // Cannot simplify select with vector condition
23574   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
23575 
23576   // If this is a select from two identical things, try to pull the operation
23577   // through the select.
23578   if (LHS.getOpcode() != RHS.getOpcode() ||
23579       !LHS.hasOneUse() || !RHS.hasOneUse())
23580     return false;
23581 
23582   // If this is a load and the token chain is identical, replace the select
23583   // of two loads with a load through a select of the address to load from.
23584   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
23585   // constants have been dropped into the constant pool.
23586   if (LHS.getOpcode() == ISD::LOAD) {
23587     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
23588     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
23589 
23590     // Token chains must be identical.
23591     if (LHS.getOperand(0) != RHS.getOperand(0) ||
23592         // Do not let this transformation reduce the number of volatile loads.
23593         // Be conservative for atomics for the moment
23594         // TODO: This does appear to be legal for unordered atomics (see D66309)
23595         !LLD->isSimple() || !RLD->isSimple() ||
23596         // FIXME: If either is a pre/post inc/dec load,
23597         // we'd need to split out the address adjustment.
23598         LLD->isIndexed() || RLD->isIndexed() ||
23599         // If this is an EXTLOAD, the VT's must match.
23600         LLD->getMemoryVT() != RLD->getMemoryVT() ||
23601         // If this is an EXTLOAD, the kind of extension must match.
23602         (LLD->getExtensionType() != RLD->getExtensionType() &&
23603          // The only exception is if one of the extensions is anyext.
23604          LLD->getExtensionType() != ISD::EXTLOAD &&
23605          RLD->getExtensionType() != ISD::EXTLOAD) ||
23606         // FIXME: this discards src value information.  This is
23607         // over-conservative. It would be beneficial to be able to remember
23608         // both potential memory locations.  Since we are discarding
23609         // src value info, don't do the transformation if the memory
23610         // locations are not in the default address space.
23611         LLD->getPointerInfo().getAddrSpace() != 0 ||
23612         RLD->getPointerInfo().getAddrSpace() != 0 ||
23613         // We can't produce a CMOV of a TargetFrameIndex since we won't
23614         // generate the address generation required.
23615         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23616         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
23617         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
23618                                       LLD->getBasePtr().getValueType()))
23619       return false;
23620 
23621     // The loads must not depend on one another.
23622     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
23623       return false;
23624 
23625     // Check that the select condition doesn't reach either load.  If so,
23626     // folding this will induce a cycle into the DAG.  If not, this is safe to
23627     // xform, so create a select of the addresses.
23628 
23629     SmallPtrSet<const SDNode *, 32> Visited;
23630     SmallVector<const SDNode *, 16> Worklist;
23631 
23632     // Always fail if LLD and RLD are not independent. TheSelect is a
23633     // predecessor to all Nodes in question so we need not search past it.
23634 
23635     Visited.insert(TheSelect);
23636     Worklist.push_back(LLD);
23637     Worklist.push_back(RLD);
23638 
23639     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
23640         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
23641       return false;
23642 
23643     SDValue Addr;
23644     if (TheSelect->getOpcode() == ISD::SELECT) {
23645       // We cannot do this optimization if any pair of {RLD, LLD} is a
23646       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
23647       // Loads, we only need to check if CondNode is a successor to one of the
23648       // loads. We can further avoid this if there's no use of their chain
23649       // value.
23650       SDNode *CondNode = TheSelect->getOperand(0).getNode();
23651       Worklist.push_back(CondNode);
23652 
23653       if ((LLD->hasAnyUseOfValue(1) &&
23654            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23655           (RLD->hasAnyUseOfValue(1) &&
23656            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23657         return false;
23658 
23659       Addr = DAG.getSelect(SDLoc(TheSelect),
23660                            LLD->getBasePtr().getValueType(),
23661                            TheSelect->getOperand(0), LLD->getBasePtr(),
23662                            RLD->getBasePtr());
23663     } else {  // Otherwise SELECT_CC
23664       // We cannot do this optimization if any pair of {RLD, LLD} is a
23665       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
23666       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
23667       // one of the loads. We can further avoid this if there's no use of their
23668       // chain value.
23669 
23670       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
23671       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
23672       Worklist.push_back(CondLHS);
23673       Worklist.push_back(CondRHS);
23674 
23675       if ((LLD->hasAnyUseOfValue(1) &&
23676            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
23677           (RLD->hasAnyUseOfValue(1) &&
23678            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
23679         return false;
23680 
23681       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
23682                          LLD->getBasePtr().getValueType(),
23683                          TheSelect->getOperand(0),
23684                          TheSelect->getOperand(1),
23685                          LLD->getBasePtr(), RLD->getBasePtr(),
23686                          TheSelect->getOperand(4));
23687     }
23688 
23689     SDValue Load;
23690     // It is safe to replace the two loads if they have different alignments,
23691     // but the new load must be the minimum (most restrictive) alignment of the
23692     // inputs.
23693     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
23694     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
23695     if (!RLD->isInvariant())
23696       MMOFlags &= ~MachineMemOperand::MOInvariant;
23697     if (!RLD->isDereferenceable())
23698       MMOFlags &= ~MachineMemOperand::MODereferenceable;
23699     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
23700       // FIXME: Discards pointer and AA info.
23701       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
23702                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
23703                          MMOFlags);
23704     } else {
23705       // FIXME: Discards pointer and AA info.
23706       Load = DAG.getExtLoad(
23707           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
23708                                                   : LLD->getExtensionType(),
23709           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
23710           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
23711     }
23712 
23713     // Users of the select now use the result of the load.
23714     CombineTo(TheSelect, Load);
23715 
23716     // Users of the old loads now use the new load's chain.  We know the
23717     // old-load value is dead now.
23718     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
23719     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
23720     return true;
23721   }
23722 
23723   return false;
23724 }
23725 
23726 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
23727 /// bitwise 'and'.
23728 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
23729                                             SDValue N1, SDValue N2, SDValue N3,
23730                                             ISD::CondCode CC) {
23731   // If this is a select where the false operand is zero and the compare is a
23732   // check of the sign bit, see if we can perform the "gzip trick":
23733   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
23734   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
23735   EVT XType = N0.getValueType();
23736   EVT AType = N2.getValueType();
23737   if (!isNullConstant(N3) || !XType.bitsGE(AType))
23738     return SDValue();
23739 
23740   // If the comparison is testing for a positive value, we have to invert
23741   // the sign bit mask, so only do that transform if the target has a bitwise
23742   // 'and not' instruction (the invert is free).
23743   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
23744     // (X > -1) ? A : 0
23745     // (X >  0) ? X : 0 <-- This is canonical signed max.
23746     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
23747       return SDValue();
23748   } else if (CC == ISD::SETLT) {
23749     // (X <  0) ? A : 0
23750     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
23751     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
23752       return SDValue();
23753   } else {
23754     return SDValue();
23755   }
23756 
23757   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
23758   // constant.
23759   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
23760   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23761   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
23762     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
23763     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
23764       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23765       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
23766       AddToWorklist(Shift.getNode());
23767 
23768       if (XType.bitsGT(AType)) {
23769         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23770         AddToWorklist(Shift.getNode());
23771       }
23772 
23773       if (CC == ISD::SETGT)
23774         Shift = DAG.getNOT(DL, Shift, AType);
23775 
23776       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23777     }
23778   }
23779 
23780   unsigned ShCt = XType.getSizeInBits() - 1;
23781   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
23782     return SDValue();
23783 
23784   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
23785   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
23786   AddToWorklist(Shift.getNode());
23787 
23788   if (XType.bitsGT(AType)) {
23789     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
23790     AddToWorklist(Shift.getNode());
23791   }
23792 
23793   if (CC == ISD::SETGT)
23794     Shift = DAG.getNOT(DL, Shift, AType);
23795 
23796   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
23797 }
23798 
23799 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
23800 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
23801   SDValue N0 = N->getOperand(0);
23802   SDValue N1 = N->getOperand(1);
23803   SDValue N2 = N->getOperand(2);
23804   EVT VT = N->getValueType(0);
23805   SDLoc DL(N);
23806 
23807   unsigned BinOpc = N1.getOpcode();
23808   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
23809     return SDValue();
23810 
23811   // The use checks are intentionally on SDNode because we may be dealing
23812   // with opcodes that produce more than one SDValue.
23813   // TODO: Do we really need to check N0 (the condition operand of the select)?
23814   //       But removing that clause could cause an infinite loop...
23815   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
23816     return SDValue();
23817 
23818   // Binops may include opcodes that return multiple values, so all values
23819   // must be created/propagated from the newly created binops below.
23820   SDVTList OpVTs = N1->getVTList();
23821 
23822   // Fold select(cond, binop(x, y), binop(z, y))
23823   //  --> binop(select(cond, x, z), y)
23824   if (N1.getOperand(1) == N2.getOperand(1)) {
23825     SDValue NewSel =
23826         DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
23827     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
23828     NewBinOp->setFlags(N1->getFlags());
23829     NewBinOp->intersectFlagsWith(N2->getFlags());
23830     return NewBinOp;
23831   }
23832 
23833   // Fold select(cond, binop(x, y), binop(x, z))
23834   //  --> binop(x, select(cond, y, z))
23835   // Second op VT might be different (e.g. shift amount type)
23836   if (N1.getOperand(0) == N2.getOperand(0) &&
23837       VT == N1.getOperand(1).getValueType() &&
23838       VT == N2.getOperand(1).getValueType()) {
23839     SDValue NewSel =
23840         DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
23841     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
23842     NewBinOp->setFlags(N1->getFlags());
23843     NewBinOp->intersectFlagsWith(N2->getFlags());
23844     return NewBinOp;
23845   }
23846 
23847   // TODO: Handle isCommutativeBinOp patterns as well?
23848   return SDValue();
23849 }
23850 
23851 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
23852 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
23853   SDValue N0 = N->getOperand(0);
23854   EVT VT = N->getValueType(0);
23855   bool IsFabs = N->getOpcode() == ISD::FABS;
23856   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
23857 
23858   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
23859     return SDValue();
23860 
23861   SDValue Int = N0.getOperand(0);
23862   EVT IntVT = Int.getValueType();
23863 
23864   // The operand to cast should be integer.
23865   if (!IntVT.isInteger() || IntVT.isVector())
23866     return SDValue();
23867 
23868   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
23869   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
23870   APInt SignMask;
23871   if (N0.getValueType().isVector()) {
23872     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
23873     // 0x7f...) per element and splat it.
23874     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
23875     if (IsFabs)
23876       SignMask = ~SignMask;
23877     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
23878   } else {
23879     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
23880     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
23881     if (IsFabs)
23882       SignMask = ~SignMask;
23883   }
23884   SDLoc DL(N0);
23885   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
23886                     DAG.getConstant(SignMask, DL, IntVT));
23887   AddToWorklist(Int.getNode());
23888   return DAG.getBitcast(VT, Int);
23889 }
23890 
23891 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
23892 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
23893 /// in it. This may be a win when the constant is not otherwise available
23894 /// because it replaces two constant pool loads with one.
23895 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
23896     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
23897     ISD::CondCode CC) {
23898   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
23899     return SDValue();
23900 
23901   // If we are before legalize types, we want the other legalization to happen
23902   // first (for example, to avoid messing with soft float).
23903   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
23904   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
23905   EVT VT = N2.getValueType();
23906   if (!TV || !FV || !TLI.isTypeLegal(VT))
23907     return SDValue();
23908 
23909   // If a constant can be materialized without loads, this does not make sense.
23910   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
23911       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
23912       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
23913     return SDValue();
23914 
23915   // If both constants have multiple uses, then we won't need to do an extra
23916   // load. The values are likely around in registers for other users.
23917   if (!TV->hasOneUse() && !FV->hasOneUse())
23918     return SDValue();
23919 
23920   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
23921                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
23922   Type *FPTy = Elts[0]->getType();
23923   const DataLayout &TD = DAG.getDataLayout();
23924 
23925   // Create a ConstantArray of the two constants.
23926   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
23927   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
23928                                       TD.getPrefTypeAlign(FPTy));
23929   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
23930 
23931   // Get offsets to the 0 and 1 elements of the array, so we can select between
23932   // them.
23933   SDValue Zero = DAG.getIntPtrConstant(0, DL);
23934   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
23935   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
23936   SDValue Cond =
23937       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
23938   AddToWorklist(Cond.getNode());
23939   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
23940   AddToWorklist(CstOffset.getNode());
23941   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
23942   AddToWorklist(CPIdx.getNode());
23943   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
23944                      MachinePointerInfo::getConstantPool(
23945                          DAG.getMachineFunction()), Alignment);
23946 }
23947 
23948 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
23949 /// where 'cond' is the comparison specified by CC.
23950 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
23951                                       SDValue N2, SDValue N3, ISD::CondCode CC,
23952                                       bool NotExtCompare) {
23953   // (x ? y : y) -> y.
23954   if (N2 == N3) return N2;
23955 
23956   EVT CmpOpVT = N0.getValueType();
23957   EVT CmpResVT = getSetCCResultType(CmpOpVT);
23958   EVT VT = N2.getValueType();
23959   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
23960   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
23961   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
23962 
23963   // Determine if the condition we're dealing with is constant.
23964   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
23965     AddToWorklist(SCC.getNode());
23966     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
23967       // fold select_cc true, x, y -> x
23968       // fold select_cc false, x, y -> y
23969       return !(SCCC->isZero()) ? N2 : N3;
23970     }
23971   }
23972 
23973   if (SDValue V =
23974           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
23975     return V;
23976 
23977   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
23978     return V;
23979 
23980   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
23981   // where y is has a single bit set.
23982   // A plaintext description would be, we can turn the SELECT_CC into an AND
23983   // when the condition can be materialized as an all-ones register.  Any
23984   // single bit-test can be materialized as an all-ones register with
23985   // shift-left and shift-right-arith.
23986   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
23987       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
23988     SDValue AndLHS = N0->getOperand(0);
23989     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
23990     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
23991       // Shift the tested bit over the sign bit.
23992       const APInt &AndMask = ConstAndRHS->getAPIntValue();
23993       unsigned ShCt = AndMask.getBitWidth() - 1;
23994       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
23995         SDValue ShlAmt =
23996           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
23997                           getShiftAmountTy(AndLHS.getValueType()));
23998         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
23999 
24000         // Now arithmetic right shift it all the way over, so the result is
24001         // either all-ones, or zero.
24002         SDValue ShrAmt =
24003           DAG.getConstant(ShCt, SDLoc(Shl),
24004                           getShiftAmountTy(Shl.getValueType()));
24005         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
24006 
24007         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
24008       }
24009     }
24010   }
24011 
24012   // fold select C, 16, 0 -> shl C, 4
24013   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
24014   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
24015 
24016   if ((Fold || Swap) &&
24017       TLI.getBooleanContents(CmpOpVT) ==
24018           TargetLowering::ZeroOrOneBooleanContent &&
24019       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
24020 
24021     if (Swap) {
24022       CC = ISD::getSetCCInverse(CC, CmpOpVT);
24023       std::swap(N2C, N3C);
24024     }
24025 
24026     // If the caller doesn't want us to simplify this into a zext of a compare,
24027     // don't do it.
24028     if (NotExtCompare && N2C->isOne())
24029       return SDValue();
24030 
24031     SDValue Temp, SCC;
24032     // zext (setcc n0, n1)
24033     if (LegalTypes) {
24034       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
24035       if (VT.bitsLT(SCC.getValueType()))
24036         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
24037       else
24038         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
24039     } else {
24040       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
24041       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
24042     }
24043 
24044     AddToWorklist(SCC.getNode());
24045     AddToWorklist(Temp.getNode());
24046 
24047     if (N2C->isOne())
24048       return Temp;
24049 
24050     unsigned ShCt = N2C->getAPIntValue().logBase2();
24051     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
24052       return SDValue();
24053 
24054     // shl setcc result by log2 n2c
24055     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
24056                        DAG.getConstant(ShCt, SDLoc(Temp),
24057                                        getShiftAmountTy(Temp.getValueType())));
24058   }
24059 
24060   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
24061   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
24062   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
24063   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
24064   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
24065   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
24066   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
24067   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
24068   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
24069     SDValue ValueOnZero = N2;
24070     SDValue Count = N3;
24071     // If the condition is NE instead of E, swap the operands.
24072     if (CC == ISD::SETNE)
24073       std::swap(ValueOnZero, Count);
24074     // Check if the value on zero is a constant equal to the bits in the type.
24075     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
24076       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
24077         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
24078         // legal, combine to just cttz.
24079         if ((Count.getOpcode() == ISD::CTTZ ||
24080              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
24081             N0 == Count.getOperand(0) &&
24082             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
24083           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
24084         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
24085         // legal, combine to just ctlz.
24086         if ((Count.getOpcode() == ISD::CTLZ ||
24087              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
24088             N0 == Count.getOperand(0) &&
24089             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
24090           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
24091       }
24092     }
24093   }
24094 
24095   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
24096   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
24097   if (!NotExtCompare && N1C && N2C && N3C &&
24098       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
24099       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
24100        (N1C->isZero() && CC == ISD::SETLT)) &&
24101       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
24102     SDValue ASR = DAG.getNode(
24103         ISD::SRA, DL, CmpOpVT, N0,
24104         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
24105     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
24106                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
24107   }
24108 
24109   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
24110     return S;
24111   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
24112     return S;
24113 
24114   return SDValue();
24115 }
24116 
24117 /// This is a stub for TargetLowering::SimplifySetCC.
24118 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
24119                                    ISD::CondCode Cond, const SDLoc &DL,
24120                                    bool foldBooleans) {
24121   TargetLowering::DAGCombinerInfo
24122     DagCombineInfo(DAG, Level, false, this);
24123   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
24124 }
24125 
24126 /// Given an ISD::SDIV node expressing a divide by constant, return
24127 /// a DAG expression to select that will generate the same value by multiplying
24128 /// by a magic number.
24129 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
24130 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
24131   // when optimising for minimum size, we don't want to expand a div to a mul
24132   // and a shift.
24133   if (DAG.getMachineFunction().getFunction().hasMinSize())
24134     return SDValue();
24135 
24136   SmallVector<SDNode *, 8> Built;
24137   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
24138     for (SDNode *N : Built)
24139       AddToWorklist(N);
24140     return S;
24141   }
24142 
24143   return SDValue();
24144 }
24145 
24146 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
24147 /// DAG expression that will generate the same value by right shifting.
24148 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
24149   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
24150   if (!C)
24151     return SDValue();
24152 
24153   // Avoid division by zero.
24154   if (C->isZero())
24155     return SDValue();
24156 
24157   SmallVector<SDNode *, 8> Built;
24158   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
24159     for (SDNode *N : Built)
24160       AddToWorklist(N);
24161     return S;
24162   }
24163 
24164   return SDValue();
24165 }
24166 
24167 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
24168 /// expression that will generate the same value by multiplying by a magic
24169 /// number.
24170 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
24171 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
24172   // when optimising for minimum size, we don't want to expand a div to a mul
24173   // and a shift.
24174   if (DAG.getMachineFunction().getFunction().hasMinSize())
24175     return SDValue();
24176 
24177   SmallVector<SDNode *, 8> Built;
24178   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
24179     for (SDNode *N : Built)
24180       AddToWorklist(N);
24181     return S;
24182   }
24183 
24184   return SDValue();
24185 }
24186 
24187 /// Given an ISD::SREM node expressing a remainder by constant power of 2,
24188 /// return a DAG expression that will generate the same value.
24189 SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
24190   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
24191   if (!C)
24192     return SDValue();
24193 
24194   // Avoid division by zero.
24195   if (C->isZero())
24196     return SDValue();
24197 
24198   SmallVector<SDNode *, 8> Built;
24199   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
24200     for (SDNode *N : Built)
24201       AddToWorklist(N);
24202     return S;
24203   }
24204 
24205   return SDValue();
24206 }
24207 
24208 /// Determines the LogBase2 value for a non-null input value using the
24209 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
24210 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
24211   EVT VT = V.getValueType();
24212   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
24213   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
24214   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
24215   return LogBase2;
24216 }
24217 
24218 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24219 /// For the reciprocal, we need to find the zero of the function:
24220 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
24221 ///     =>
24222 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
24223 ///     does not require additional intermediate precision]
24224 /// For the last iteration, put numerator N into it to gain more precision:
24225 ///   Result = N X_i + X_i (N - N A X_i)
24226 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
24227                                       SDNodeFlags Flags) {
24228   if (LegalDAG)
24229     return SDValue();
24230 
24231   // TODO: Handle extended types?
24232   EVT VT = Op.getValueType();
24233   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
24234       VT.getScalarType() != MVT::f64)
24235     return SDValue();
24236 
24237   // If estimates are explicitly disabled for this function, we're done.
24238   MachineFunction &MF = DAG.getMachineFunction();
24239   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
24240   if (Enabled == TLI.ReciprocalEstimate::Disabled)
24241     return SDValue();
24242 
24243   // Estimates may be explicitly enabled for this type with a custom number of
24244   // refinement steps.
24245   int Iterations = TLI.getDivRefinementSteps(VT, MF);
24246   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
24247     AddToWorklist(Est.getNode());
24248 
24249     SDLoc DL(Op);
24250     if (Iterations) {
24251       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
24252 
24253       // Newton iterations: Est = Est + Est (N - Arg * Est)
24254       // If this is the last iteration, also multiply by the numerator.
24255       for (int i = 0; i < Iterations; ++i) {
24256         SDValue MulEst = Est;
24257 
24258         if (i == Iterations - 1) {
24259           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
24260           AddToWorklist(MulEst.getNode());
24261         }
24262 
24263         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
24264         AddToWorklist(NewEst.getNode());
24265 
24266         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
24267                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
24268         AddToWorklist(NewEst.getNode());
24269 
24270         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
24271         AddToWorklist(NewEst.getNode());
24272 
24273         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
24274         AddToWorklist(Est.getNode());
24275       }
24276     } else {
24277       // If no iterations are available, multiply with N.
24278       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
24279       AddToWorklist(Est.getNode());
24280     }
24281 
24282     return Est;
24283   }
24284 
24285   return SDValue();
24286 }
24287 
24288 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24289 /// For the reciprocal sqrt, we need to find the zero of the function:
24290 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24291 ///     =>
24292 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
24293 /// As a result, we precompute A/2 prior to the iteration loop.
24294 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
24295                                          unsigned Iterations,
24296                                          SDNodeFlags Flags, bool Reciprocal) {
24297   EVT VT = Arg.getValueType();
24298   SDLoc DL(Arg);
24299   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
24300 
24301   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
24302   // this entire sequence requires only one FP constant.
24303   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
24304   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
24305 
24306   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
24307   for (unsigned i = 0; i < Iterations; ++i) {
24308     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
24309     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
24310     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
24311     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
24312   }
24313 
24314   // If non-reciprocal square root is requested, multiply the result by Arg.
24315   if (!Reciprocal)
24316     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
24317 
24318   return Est;
24319 }
24320 
24321 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
24322 /// For the reciprocal sqrt, we need to find the zero of the function:
24323 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
24324 ///     =>
24325 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
24326 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
24327                                          unsigned Iterations,
24328                                          SDNodeFlags Flags, bool Reciprocal) {
24329   EVT VT = Arg.getValueType();
24330   SDLoc DL(Arg);
24331   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
24332   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
24333 
24334   // This routine must enter the loop below to work correctly
24335   // when (Reciprocal == false).
24336   assert(Iterations > 0);
24337 
24338   // Newton iterations for reciprocal square root:
24339   // E = (E * -0.5) * ((A * E) * E + -3.0)
24340   for (unsigned i = 0; i < Iterations; ++i) {
24341     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
24342     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
24343     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
24344 
24345     // When calculating a square root at the last iteration build:
24346     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
24347     // (notice a common subexpression)
24348     SDValue LHS;
24349     if (Reciprocal || (i + 1) < Iterations) {
24350       // RSQRT: LHS = (E * -0.5)
24351       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
24352     } else {
24353       // SQRT: LHS = (A * E) * -0.5
24354       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
24355     }
24356 
24357     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
24358   }
24359 
24360   return Est;
24361 }
24362 
24363 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
24364 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
24365 /// Op can be zero.
24366 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
24367                                            bool Reciprocal) {
24368   if (LegalDAG)
24369     return SDValue();
24370 
24371   // TODO: Handle extended types?
24372   EVT VT = Op.getValueType();
24373   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
24374       VT.getScalarType() != MVT::f64)
24375     return SDValue();
24376 
24377   // If estimates are explicitly disabled for this function, we're done.
24378   MachineFunction &MF = DAG.getMachineFunction();
24379   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
24380   if (Enabled == TLI.ReciprocalEstimate::Disabled)
24381     return SDValue();
24382 
24383   // Estimates may be explicitly enabled for this type with a custom number of
24384   // refinement steps.
24385   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
24386 
24387   bool UseOneConstNR = false;
24388   if (SDValue Est =
24389       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
24390                           Reciprocal)) {
24391     AddToWorklist(Est.getNode());
24392 
24393     if (Iterations)
24394       Est = UseOneConstNR
24395             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
24396             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
24397     if (!Reciprocal) {
24398       SDLoc DL(Op);
24399       // Try the target specific test first.
24400       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
24401 
24402       // The estimate is now completely wrong if the input was exactly 0.0 or
24403       // possibly a denormal. Force the answer to 0.0 or value provided by
24404       // target for those cases.
24405       Est = DAG.getNode(
24406           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
24407           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
24408     }
24409     return Est;
24410   }
24411 
24412   return SDValue();
24413 }
24414 
24415 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24416   return buildSqrtEstimateImpl(Op, Flags, true);
24417 }
24418 
24419 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
24420   return buildSqrtEstimateImpl(Op, Flags, false);
24421 }
24422 
24423 /// Return true if there is any possibility that the two addresses overlap.
24424 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
24425 
24426   struct MemUseCharacteristics {
24427     bool IsVolatile;
24428     bool IsAtomic;
24429     SDValue BasePtr;
24430     int64_t Offset;
24431     Optional<int64_t> NumBytes;
24432     MachineMemOperand *MMO;
24433   };
24434 
24435   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
24436     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
24437       int64_t Offset = 0;
24438       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
24439         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
24440                      ? C->getSExtValue()
24441                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
24442                            ? -1 * C->getSExtValue()
24443                            : 0;
24444       uint64_t Size =
24445           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
24446       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
24447               Offset /*base offset*/,
24448               Optional<int64_t>(Size),
24449               LSN->getMemOperand()};
24450     }
24451     if (const auto *LN = cast<LifetimeSDNode>(N))
24452       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
24453               (LN->hasOffset()) ? LN->getOffset() : 0,
24454               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
24455                                 : Optional<int64_t>(),
24456               (MachineMemOperand *)nullptr};
24457     // Default.
24458     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
24459             (int64_t)0 /*offset*/,
24460             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
24461   };
24462 
24463   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
24464                         MUC1 = getCharacteristics(Op1);
24465 
24466   // If they are to the same address, then they must be aliases.
24467   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
24468       MUC0.Offset == MUC1.Offset)
24469     return true;
24470 
24471   // If they are both volatile then they cannot be reordered.
24472   if (MUC0.IsVolatile && MUC1.IsVolatile)
24473     return true;
24474 
24475   // Be conservative about atomics for the moment
24476   // TODO: This is way overconservative for unordered atomics (see D66309)
24477   if (MUC0.IsAtomic && MUC1.IsAtomic)
24478     return true;
24479 
24480   if (MUC0.MMO && MUC1.MMO) {
24481     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24482         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24483       return false;
24484   }
24485 
24486   // Try to prove that there is aliasing, or that there is no aliasing. Either
24487   // way, we can return now. If nothing can be proved, proceed with more tests.
24488   bool IsAlias;
24489   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
24490                                        DAG, IsAlias))
24491     return IsAlias;
24492 
24493   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
24494   // either are not known.
24495   if (!MUC0.MMO || !MUC1.MMO)
24496     return true;
24497 
24498   // If one operation reads from invariant memory, and the other may store, they
24499   // cannot alias. These should really be checking the equivalent of mayWrite,
24500   // but it only matters for memory nodes other than load /store.
24501   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
24502       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
24503     return false;
24504 
24505   // If we know required SrcValue1 and SrcValue2 have relatively large
24506   // alignment compared to the size and offset of the access, we may be able
24507   // to prove they do not alias. This check is conservative for now to catch
24508   // cases created by splitting vector types, it only works when the offsets are
24509   // multiples of the size of the data.
24510   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
24511   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
24512   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
24513   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
24514   auto &Size0 = MUC0.NumBytes;
24515   auto &Size1 = MUC1.NumBytes;
24516   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
24517       Size0 && Size1 && *Size0 == *Size1 && OrigAlignment0 > *Size0 &&
24518       SrcValOffset0 % *Size0 == 0 && SrcValOffset1 % *Size1 == 0) {
24519     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
24520     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
24521 
24522     // There is no overlap between these relatively aligned accesses of
24523     // similar size. Return no alias.
24524     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
24525       return false;
24526   }
24527 
24528   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
24529                    ? CombinerGlobalAA
24530                    : DAG.getSubtarget().useAA();
24531 #ifndef NDEBUG
24532   if (CombinerAAOnlyFunc.getNumOccurrences() &&
24533       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
24534     UseAA = false;
24535 #endif
24536 
24537   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
24538       Size1) {
24539     // Use alias analysis information.
24540     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
24541     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
24542     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
24543     if (AA->isNoAlias(
24544             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
24545                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
24546             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
24547                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
24548       return false;
24549   }
24550 
24551   // Otherwise we have to assume they alias.
24552   return true;
24553 }
24554 
24555 /// Walk up chain skipping non-aliasing memory nodes,
24556 /// looking for aliasing nodes and adding them to the Aliases vector.
24557 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
24558                                    SmallVectorImpl<SDValue> &Aliases) {
24559   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
24560   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
24561 
24562   // Get alias information for node.
24563   // TODO: relax aliasing for unordered atomics (see D66309)
24564   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
24565 
24566   // Starting off.
24567   Chains.push_back(OriginalChain);
24568   unsigned Depth = 0;
24569 
24570   // Attempt to improve chain by a single step
24571   auto ImproveChain = [&](SDValue &C) -> bool {
24572     switch (C.getOpcode()) {
24573     case ISD::EntryToken:
24574       // No need to mark EntryToken.
24575       C = SDValue();
24576       return true;
24577     case ISD::LOAD:
24578     case ISD::STORE: {
24579       // Get alias information for C.
24580       // TODO: Relax aliasing for unordered atomics (see D66309)
24581       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
24582                       cast<LSBaseSDNode>(C.getNode())->isSimple();
24583       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
24584         // Look further up the chain.
24585         C = C.getOperand(0);
24586         return true;
24587       }
24588       // Alias, so stop here.
24589       return false;
24590     }
24591 
24592     case ISD::CopyFromReg:
24593       // Always forward past past CopyFromReg.
24594       C = C.getOperand(0);
24595       return true;
24596 
24597     case ISD::LIFETIME_START:
24598     case ISD::LIFETIME_END: {
24599       // We can forward past any lifetime start/end that can be proven not to
24600       // alias the memory access.
24601       if (!mayAlias(N, C.getNode())) {
24602         // Look further up the chain.
24603         C = C.getOperand(0);
24604         return true;
24605       }
24606       return false;
24607     }
24608     default:
24609       return false;
24610     }
24611   };
24612 
24613   // Look at each chain and determine if it is an alias.  If so, add it to the
24614   // aliases list.  If not, then continue up the chain looking for the next
24615   // candidate.
24616   while (!Chains.empty()) {
24617     SDValue Chain = Chains.pop_back_val();
24618 
24619     // Don't bother if we've seen Chain before.
24620     if (!Visited.insert(Chain.getNode()).second)
24621       continue;
24622 
24623     // For TokenFactor nodes, look at each operand and only continue up the
24624     // chain until we reach the depth limit.
24625     //
24626     // FIXME: The depth check could be made to return the last non-aliasing
24627     // chain we found before we hit a tokenfactor rather than the original
24628     // chain.
24629     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
24630       Aliases.clear();
24631       Aliases.push_back(OriginalChain);
24632       return;
24633     }
24634 
24635     if (Chain.getOpcode() == ISD::TokenFactor) {
24636       // We have to check each of the operands of the token factor for "small"
24637       // token factors, so we queue them up.  Adding the operands to the queue
24638       // (stack) in reverse order maintains the original order and increases the
24639       // likelihood that getNode will find a matching token factor (CSE.)
24640       if (Chain.getNumOperands() > 16) {
24641         Aliases.push_back(Chain);
24642         continue;
24643       }
24644       for (unsigned n = Chain.getNumOperands(); n;)
24645         Chains.push_back(Chain.getOperand(--n));
24646       ++Depth;
24647       continue;
24648     }
24649     // Everything else
24650     if (ImproveChain(Chain)) {
24651       // Updated Chain Found, Consider new chain if one exists.
24652       if (Chain.getNode())
24653         Chains.push_back(Chain);
24654       ++Depth;
24655       continue;
24656     }
24657     // No Improved Chain Possible, treat as Alias.
24658     Aliases.push_back(Chain);
24659   }
24660 }
24661 
24662 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
24663 /// (aliasing node.)
24664 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
24665   if (OptLevel == CodeGenOpt::None)
24666     return OldChain;
24667 
24668   // Ops for replacing token factor.
24669   SmallVector<SDValue, 8> Aliases;
24670 
24671   // Accumulate all the aliases to this node.
24672   GatherAllAliases(N, OldChain, Aliases);
24673 
24674   // If no operands then chain to entry token.
24675   if (Aliases.size() == 0)
24676     return DAG.getEntryNode();
24677 
24678   // If a single operand then chain to it.  We don't need to revisit it.
24679   if (Aliases.size() == 1)
24680     return Aliases[0];
24681 
24682   // Construct a custom tailored token factor.
24683   return DAG.getTokenFactor(SDLoc(N), Aliases);
24684 }
24685 
24686 namespace {
24687 // TODO: Replace with with std::monostate when we move to C++17.
24688 struct UnitT { } Unit;
24689 bool operator==(const UnitT &, const UnitT &) { return true; }
24690 bool operator!=(const UnitT &, const UnitT &) { return false; }
24691 } // namespace
24692 
24693 // This function tries to collect a bunch of potentially interesting
24694 // nodes to improve the chains of, all at once. This might seem
24695 // redundant, as this function gets called when visiting every store
24696 // node, so why not let the work be done on each store as it's visited?
24697 //
24698 // I believe this is mainly important because mergeConsecutiveStores
24699 // is unable to deal with merging stores of different sizes, so unless
24700 // we improve the chains of all the potential candidates up-front
24701 // before running mergeConsecutiveStores, it might only see some of
24702 // the nodes that will eventually be candidates, and then not be able
24703 // to go from a partially-merged state to the desired final
24704 // fully-merged state.
24705 
24706 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
24707   SmallVector<StoreSDNode *, 8> ChainedStores;
24708   StoreSDNode *STChain = St;
24709   // Intervals records which offsets from BaseIndex have been covered. In
24710   // the common case, every store writes to the immediately previous address
24711   // space and thus merged with the previous interval at insertion time.
24712 
24713   using IMap =
24714       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
24715   IMap::Allocator A;
24716   IMap Intervals(A);
24717 
24718   // This holds the base pointer, index, and the offset in bytes from the base
24719   // pointer.
24720   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24721 
24722   // We must have a base and an offset.
24723   if (!BasePtr.getBase().getNode())
24724     return false;
24725 
24726   // Do not handle stores to undef base pointers.
24727   if (BasePtr.getBase().isUndef())
24728     return false;
24729 
24730   // Do not handle stores to opaque types
24731   if (St->getMemoryVT().isZeroSized())
24732     return false;
24733 
24734   // BaseIndexOffset assumes that offsets are fixed-size, which
24735   // is not valid for scalable vectors where the offsets are
24736   // scaled by `vscale`, so bail out early.
24737   if (St->getMemoryVT().isScalableVector())
24738     return false;
24739 
24740   // Add ST's interval.
24741   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
24742 
24743   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
24744     if (Chain->getMemoryVT().isScalableVector())
24745       return false;
24746 
24747     // If the chain has more than one use, then we can't reorder the mem ops.
24748     if (!SDValue(Chain, 0)->hasOneUse())
24749       break;
24750     // TODO: Relax for unordered atomics (see D66309)
24751     if (!Chain->isSimple() || Chain->isIndexed())
24752       break;
24753 
24754     // Find the base pointer and offset for this memory node.
24755     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
24756     // Check that the base pointer is the same as the original one.
24757     int64_t Offset;
24758     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
24759       break;
24760     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
24761     // Make sure we don't overlap with other intervals by checking the ones to
24762     // the left or right before inserting.
24763     auto I = Intervals.find(Offset);
24764     // If there's a next interval, we should end before it.
24765     if (I != Intervals.end() && I.start() < (Offset + Length))
24766       break;
24767     // If there's a previous interval, we should start after it.
24768     if (I != Intervals.begin() && (--I).stop() <= Offset)
24769       break;
24770     Intervals.insert(Offset, Offset + Length, Unit);
24771 
24772     ChainedStores.push_back(Chain);
24773     STChain = Chain;
24774   }
24775 
24776   // If we didn't find a chained store, exit.
24777   if (ChainedStores.size() == 0)
24778     return false;
24779 
24780   // Improve all chained stores (St and ChainedStores members) starting from
24781   // where the store chain ended and return single TokenFactor.
24782   SDValue NewChain = STChain->getChain();
24783   SmallVector<SDValue, 8> TFOps;
24784   for (unsigned I = ChainedStores.size(); I;) {
24785     StoreSDNode *S = ChainedStores[--I];
24786     SDValue BetterChain = FindBetterChain(S, NewChain);
24787     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
24788         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
24789     TFOps.push_back(SDValue(S, 0));
24790     ChainedStores[I] = S;
24791   }
24792 
24793   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
24794   SDValue BetterChain = FindBetterChain(St, NewChain);
24795   SDValue NewST;
24796   if (St->isTruncatingStore())
24797     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
24798                               St->getBasePtr(), St->getMemoryVT(),
24799                               St->getMemOperand());
24800   else
24801     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
24802                          St->getBasePtr(), St->getMemOperand());
24803 
24804   TFOps.push_back(NewST);
24805 
24806   // If we improved every element of TFOps, then we've lost the dependence on
24807   // NewChain to successors of St and we need to add it back to TFOps. Do so at
24808   // the beginning to keep relative order consistent with FindBetterChains.
24809   auto hasImprovedChain = [&](SDValue ST) -> bool {
24810     return ST->getOperand(0) != NewChain;
24811   };
24812   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
24813   if (AddNewChain)
24814     TFOps.insert(TFOps.begin(), NewChain);
24815 
24816   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
24817   CombineTo(St, TF);
24818 
24819   // Add TF and its operands to the worklist.
24820   AddToWorklist(TF.getNode());
24821   for (const SDValue &Op : TF->ops())
24822     AddToWorklist(Op.getNode());
24823   AddToWorklist(STChain);
24824   return true;
24825 }
24826 
24827 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
24828   if (OptLevel == CodeGenOpt::None)
24829     return false;
24830 
24831   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
24832 
24833   // We must have a base and an offset.
24834   if (!BasePtr.getBase().getNode())
24835     return false;
24836 
24837   // Do not handle stores to undef base pointers.
24838   if (BasePtr.getBase().isUndef())
24839     return false;
24840 
24841   // Directly improve a chain of disjoint stores starting at St.
24842   if (parallelizeChainedStores(St))
24843     return true;
24844 
24845   // Improve St's Chain..
24846   SDValue BetterChain = FindBetterChain(St, St->getChain());
24847   if (St->getChain() != BetterChain) {
24848     replaceStoreChain(St, BetterChain);
24849     return true;
24850   }
24851   return false;
24852 }
24853 
24854 /// This is the entry point for the file.
24855 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
24856                            CodeGenOpt::Level OptLevel) {
24857   /// This is the main entry point to this class.
24858   DAGCombiner(*this, AA, OptLevel).Run(Level);
24859 }
24860