1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MachineValueType.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Target/TargetMachine.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96         cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
99 static cl::opt<std::string>
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101                    cl::desc("Only use DAG-combiner alias analysis in this"
102                             " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109                   cl::desc("Bypass the profitability model of load slicing"),
110                   cl::init(false));
111 
112 static cl::opt<bool>
113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114                     cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118                        cl::desc("DAG combiner enable merging multiple stores "
119                                 "into a wider store"));
120 
121 static cl::opt<unsigned> TokenFactorInlineLimit(
122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123     cl::desc("Limit the number of operands to inline for Token Factors"));
124 
125 static cl::opt<unsigned> StoreMergeDependenceLimit(
126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
128              "to bail out in store merging dependence check"));
129 
130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132     cl::desc("DAG cominber enable reducing the width of load/op/store "
133              "sequence"));
134 
135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137     cl::desc("DAG cominber enable load/<replace bytes>/store with "
138              "a narrower store"));
139 
140 namespace {
141 
142   class DAGCombiner {
143     SelectionDAG &DAG;
144     const TargetLowering &TLI;
145     const SelectionDAGTargetInfo *STI;
146     CombineLevel Level;
147     CodeGenOpt::Level OptLevel;
148     bool LegalDAG = false;
149     bool LegalOperations = false;
150     bool LegalTypes = false;
151     bool ForCodeSize;
152     bool DisableGenericCombines;
153 
154     /// Worklist of all of the nodes that need to be simplified.
155     ///
156     /// This must behave as a stack -- new nodes to process are pushed onto the
157     /// back and when processing we pop off of the back.
158     ///
159     /// The worklist will not contain duplicates but may contain null entries
160     /// due to nodes being deleted from the underlying DAG.
161     SmallVector<SDNode *, 64> Worklist;
162 
163     /// Mapping from an SDNode to its position on the worklist.
164     ///
165     /// This is used to find and remove nodes from the worklist (by nulling
166     /// them) when they are deleted from the underlying DAG. It relies on
167     /// stable indices of nodes within the worklist.
168     DenseMap<SDNode *, unsigned> WorklistMap;
169     /// This records all nodes attempted to add to the worklist since we
170     /// considered a new worklist entry. As we keep do not add duplicate nodes
171     /// in the worklist, this is different from the tail of the worklist.
172     SmallSetVector<SDNode *, 32> PruningList;
173 
174     /// Set of nodes which have been combined (at least once).
175     ///
176     /// This is used to allow us to reliably add any operands of a DAG node
177     /// which have not yet been combined to the worklist.
178     SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180     /// Map from candidate StoreNode to the pair of RootNode and count.
181     /// The count is used to track how many times we have seen the StoreNode
182     /// with the same RootNode bail out in dependence check. If we have seen
183     /// the bail out for the same pair many times over a limit, we won't
184     /// consider the StoreNode with the same RootNode as store merging
185     /// candidate again.
186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187 
188     // AA - Used for DAG load/store alias analysis.
189     AliasAnalysis *AA;
190 
191     /// When an instruction is simplified, add all users of the instruction to
192     /// the work lists because they might get more simplified now.
193     void AddUsersToWorklist(SDNode *N) {
194       for (SDNode *Node : N->uses())
195         AddToWorklist(Node);
196     }
197 
198     /// Convenient shorthand to add a node and all of its user to the worklist.
199     void AddToWorklistWithUsers(SDNode *N) {
200       AddUsersToWorklist(N);
201       AddToWorklist(N);
202     }
203 
204     // Prune potentially dangling nodes. This is called after
205     // any visit to a node, but should also be called during a visit after any
206     // failed combine which may have created a DAG node.
207     void clearAddedDanglingWorklistEntries() {
208       // Check any nodes added to the worklist to see if they are prunable.
209       while (!PruningList.empty()) {
210         auto *N = PruningList.pop_back_val();
211         if (N->use_empty())
212           recursivelyDeleteUnusedNodes(N);
213       }
214     }
215 
216     SDNode *getNextWorklistEntry() {
217       // Before we do any work, remove nodes that are not in use.
218       clearAddedDanglingWorklistEntries();
219       SDNode *N = nullptr;
220       // The Worklist holds the SDNodes in order, but it may contain null
221       // entries.
222       while (!N && !Worklist.empty()) {
223         N = Worklist.pop_back_val();
224       }
225 
226       if (N) {
227         bool GoodWorklistEntry = WorklistMap.erase(N);
228         (void)GoodWorklistEntry;
229         assert(GoodWorklistEntry &&
230                "Found a worklist entry without a corresponding map entry!");
231       }
232       return N;
233     }
234 
235     /// Call the node-specific routine that folds each particular type of node.
236     SDValue visit(SDNode *N);
237 
238   public:
239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240         : DAG(D), TLI(D.getTargetLoweringInfo()),
241           STI(D.getSubtarget().getSelectionDAGInfo()),
242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243       ForCodeSize = DAG.shouldOptForSize();
244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246       MaximumLegalStoreInBits = 0;
247       // We use the minimum store size here, since that's all we can guarantee
248       // for the scalable vector types.
249       for (MVT VT : MVT::all_valuetypes())
250         if (EVT(VT).isSimple() && VT != MVT::Other &&
251             TLI.isTypeLegal(EVT(VT)) &&
252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254     }
255 
256     void ConsiderForPruning(SDNode *N) {
257       // Mark this for potential pruning.
258       PruningList.insert(N);
259     }
260 
261     /// Add to the worklist making sure its instance is at the back (next to be
262     /// processed.)
263     void AddToWorklist(SDNode *N) {
264       assert(N->getOpcode() != ISD::DELETED_NODE &&
265              "Deleted Node added to Worklist");
266 
267       // Skip handle nodes as they can't usefully be combined and confuse the
268       // zero-use deletion strategy.
269       if (N->getOpcode() == ISD::HANDLENODE)
270         return;
271 
272       ConsiderForPruning(N);
273 
274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275         Worklist.push_back(N);
276     }
277 
278     /// Remove all instances of N from the worklist.
279     void removeFromWorklist(SDNode *N) {
280       CombinedNodes.erase(N);
281       PruningList.remove(N);
282       StoreRootCountMap.erase(N);
283 
284       auto It = WorklistMap.find(N);
285       if (It == WorklistMap.end())
286         return; // Not in the worklist.
287 
288       // Null out the entry rather than erasing it to avoid a linear operation.
289       Worklist[It->second] = nullptr;
290       WorklistMap.erase(It);
291     }
292 
293     void deleteAndRecombine(SDNode *N);
294     bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296     /// Replaces all uses of the results of one DAG node with new values.
297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298                       bool AddTo = true);
299 
300     /// Replaces all uses of the results of one DAG node with new values.
301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302       return CombineTo(N, &Res, 1, AddTo);
303     }
304 
305     /// Replaces all uses of the results of one DAG node with new values.
306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307                       bool AddTo = true) {
308       SDValue To[] = { Res0, Res1 };
309       return CombineTo(N, To, 2, AddTo);
310     }
311 
312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314   private:
315     unsigned MaximumLegalStoreInBits;
316 
317     /// Check the specified integer node value to see if it can be simplified or
318     /// if things it uses can be simplified by bit propagation.
319     /// If so, return true.
320     bool SimplifyDemandedBits(SDValue Op) {
321       unsigned BitWidth = Op.getScalarValueSizeInBits();
322       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323       return SimplifyDemandedBits(Op, DemandedBits);
324     }
325 
326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328       KnownBits Known;
329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330         return false;
331 
332       // Revisit the node.
333       AddToWorklist(Op.getNode());
334 
335       CommitTargetLoweringOpt(TLO);
336       return true;
337     }
338 
339     /// Check the specified vector node value to see if it can be simplified or
340     /// if things it uses can be simplified as it only uses some of the
341     /// elements. If so, return true.
342     bool SimplifyDemandedVectorElts(SDValue Op) {
343       // TODO: For now just pretend it cannot be simplified.
344       if (Op.getValueType().isScalableVector())
345         return false;
346 
347       unsigned NumElts = Op.getValueType().getVectorNumElements();
348       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349       return SimplifyDemandedVectorElts(Op, DemandedElts);
350     }
351 
352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353                               const APInt &DemandedElts,
354                               bool AssumeSingleUse = false);
355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356                                     bool AssumeSingleUse = false);
357 
358     bool CombineToPreIndexedLoadStore(SDNode *N);
359     bool CombineToPostIndexedLoadStore(SDNode *N);
360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361     bool SliceUpLoad(SDNode *N);
362 
363     // Scalars have size 0 to distinguish from singleton vectors.
364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369     ///   load.
370     ///
371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373     /// \param EltNo index of the vector element to load.
374     /// \param OriginalLoad load that EVE came from to be replaced.
375     /// \returns EVE on success SDValue() on failure.
376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377                                          SDValue EltNo,
378                                          LoadSDNode *OriginalLoad);
379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383     SDValue PromoteIntBinOp(SDValue Op);
384     SDValue PromoteIntShiftOp(SDValue Op);
385     SDValue PromoteExtend(SDValue Op);
386     bool PromoteLoad(SDValue Op);
387 
388     /// Call the node-specific routine that knows how to fold each
389     /// particular type of node. If that doesn't do anything, try the
390     /// target-specific DAG combines.
391     SDValue combine(SDNode *N);
392 
393     // Visitation implementation - Implement dag node combining for different
394     // node types.  The semantics are as follows:
395     // Return Value:
396     //   SDValue.getNode() == 0 - No change was made
397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
398     //   otherwise              - N should be replaced by the returned Operand.
399     //
400     SDValue visitTokenFactor(SDNode *N);
401     SDValue visitMERGE_VALUES(SDNode *N);
402     SDValue visitADD(SDNode *N);
403     SDValue visitADDLike(SDNode *N);
404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405     SDValue visitSUB(SDNode *N);
406     SDValue visitADDSAT(SDNode *N);
407     SDValue visitSUBSAT(SDNode *N);
408     SDValue visitADDC(SDNode *N);
409     SDValue visitADDO(SDNode *N);
410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411     SDValue visitSUBC(SDNode *N);
412     SDValue visitSUBO(SDNode *N);
413     SDValue visitADDE(SDNode *N);
414     SDValue visitADDCARRY(SDNode *N);
415     SDValue visitSADDO_CARRY(SDNode *N);
416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417     SDValue visitSUBE(SDNode *N);
418     SDValue visitSUBCARRY(SDNode *N);
419     SDValue visitSSUBO_CARRY(SDNode *N);
420     SDValue visitMUL(SDNode *N);
421     SDValue visitMULFIX(SDNode *N);
422     SDValue useDivRem(SDNode *N);
423     SDValue visitSDIV(SDNode *N);
424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425     SDValue visitUDIV(SDNode *N);
426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427     SDValue visitREM(SDNode *N);
428     SDValue visitMULHU(SDNode *N);
429     SDValue visitMULHS(SDNode *N);
430     SDValue visitSMUL_LOHI(SDNode *N);
431     SDValue visitUMUL_LOHI(SDNode *N);
432     SDValue visitMULO(SDNode *N);
433     SDValue visitIMINMAX(SDNode *N);
434     SDValue visitAND(SDNode *N);
435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436     SDValue visitOR(SDNode *N);
437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438     SDValue visitXOR(SDNode *N);
439     SDValue SimplifyVBinOp(SDNode *N);
440     SDValue visitSHL(SDNode *N);
441     SDValue visitSRA(SDNode *N);
442     SDValue visitSRL(SDNode *N);
443     SDValue visitFunnelShift(SDNode *N);
444     SDValue visitRotate(SDNode *N);
445     SDValue visitABS(SDNode *N);
446     SDValue visitBSWAP(SDNode *N);
447     SDValue visitBITREVERSE(SDNode *N);
448     SDValue visitCTLZ(SDNode *N);
449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450     SDValue visitCTTZ(SDNode *N);
451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452     SDValue visitCTPOP(SDNode *N);
453     SDValue visitSELECT(SDNode *N);
454     SDValue visitVSELECT(SDNode *N);
455     SDValue visitSELECT_CC(SDNode *N);
456     SDValue visitSETCC(SDNode *N);
457     SDValue visitSETCCCARRY(SDNode *N);
458     SDValue visitSIGN_EXTEND(SDNode *N);
459     SDValue visitZERO_EXTEND(SDNode *N);
460     SDValue visitANY_EXTEND(SDNode *N);
461     SDValue visitAssertExt(SDNode *N);
462     SDValue visitAssertAlign(SDNode *N);
463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466     SDValue visitTRUNCATE(SDNode *N);
467     SDValue visitBITCAST(SDNode *N);
468     SDValue visitFREEZE(SDNode *N);
469     SDValue visitBUILD_PAIR(SDNode *N);
470     SDValue visitFADD(SDNode *N);
471     SDValue visitSTRICT_FADD(SDNode *N);
472     SDValue visitFSUB(SDNode *N);
473     SDValue visitFMUL(SDNode *N);
474     SDValue visitFMA(SDNode *N);
475     SDValue visitFDIV(SDNode *N);
476     SDValue visitFREM(SDNode *N);
477     SDValue visitFSQRT(SDNode *N);
478     SDValue visitFCOPYSIGN(SDNode *N);
479     SDValue visitFPOW(SDNode *N);
480     SDValue visitSINT_TO_FP(SDNode *N);
481     SDValue visitUINT_TO_FP(SDNode *N);
482     SDValue visitFP_TO_SINT(SDNode *N);
483     SDValue visitFP_TO_UINT(SDNode *N);
484     SDValue visitFP_ROUND(SDNode *N);
485     SDValue visitFP_EXTEND(SDNode *N);
486     SDValue visitFNEG(SDNode *N);
487     SDValue visitFABS(SDNode *N);
488     SDValue visitFCEIL(SDNode *N);
489     SDValue visitFTRUNC(SDNode *N);
490     SDValue visitFFLOOR(SDNode *N);
491     SDValue visitFMINNUM(SDNode *N);
492     SDValue visitFMAXNUM(SDNode *N);
493     SDValue visitFMINIMUM(SDNode *N);
494     SDValue visitFMAXIMUM(SDNode *N);
495     SDValue visitBRCOND(SDNode *N);
496     SDValue visitBR_CC(SDNode *N);
497     SDValue visitLOAD(SDNode *N);
498 
499     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501 
502     SDValue visitSTORE(SDNode *N);
503     SDValue visitLIFETIME_END(SDNode *N);
504     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506     SDValue visitBUILD_VECTOR(SDNode *N);
507     SDValue visitCONCAT_VECTORS(SDNode *N);
508     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509     SDValue visitVECTOR_SHUFFLE(SDNode *N);
510     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511     SDValue visitINSERT_SUBVECTOR(SDNode *N);
512     SDValue visitMLOAD(SDNode *N);
513     SDValue visitMSTORE(SDNode *N);
514     SDValue visitMGATHER(SDNode *N);
515     SDValue visitMSCATTER(SDNode *N);
516     SDValue visitFP_TO_FP16(SDNode *N);
517     SDValue visitFP16_TO_FP(SDNode *N);
518     SDValue visitVECREDUCE(SDNode *N);
519 
520     SDValue visitFADDForFMACombine(SDNode *N);
521     SDValue visitFSUBForFMACombine(SDNode *N);
522     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523 
524     SDValue XformToShuffleWithZero(SDNode *N);
525     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526                                                     const SDLoc &DL, SDValue N0,
527                                                     SDValue N1);
528     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529                                       SDValue N1);
530     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531                            SDValue N1, SDNodeFlags Flags);
532 
533     SDValue visitShiftByConstant(SDNode *N);
534 
535     SDValue foldSelectOfConstants(SDNode *N);
536     SDValue foldVSelectOfConstants(SDNode *N);
537     SDValue foldBinOpIntoSelect(SDNode *BO);
538     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542                              SDValue N2, SDValue N3, ISD::CondCode CC,
543                              bool NotExtCompare = false);
544     SDValue convertSelectOfFPConstantsToLoadOffset(
545         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546         ISD::CondCode CC);
547     SDValue foldSignChangeInBitcast(SDNode *N);
548     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549                                    SDValue N2, SDValue N3, ISD::CondCode CC);
550     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551                               const SDLoc &DL);
552     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
553     SDValue unfoldMaskedMerge(SDNode *N);
554     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
555     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
556                           const SDLoc &DL, bool foldBooleans);
557     SDValue rebuildSetCC(SDValue N);
558 
559     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
560                            SDValue &CC, bool MatchStrict = false) const;
561     bool isOneUseSetCC(SDValue N) const;
562 
563     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
564                                          unsigned HiOp);
565     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
566     SDValue CombineExtLoad(SDNode *N);
567     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
568     SDValue combineRepeatedFPDivisors(SDNode *N);
569     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
570     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
571     SDValue BuildSDIV(SDNode *N);
572     SDValue BuildSDIVPow2(SDNode *N);
573     SDValue BuildUDIV(SDNode *N);
574     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
575     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
576     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
577     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
578     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
579     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
580                                 SDNodeFlags Flags, bool Reciprocal);
581     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
582                                 SDNodeFlags Flags, bool Reciprocal);
583     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
584                                bool DemandHighBits = true);
585     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
586     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
587                               SDValue InnerPos, SDValue InnerNeg,
588                               unsigned PosOpcode, unsigned NegOpcode,
589                               const SDLoc &DL);
590     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
591                               SDValue InnerPos, SDValue InnerNeg,
592                               unsigned PosOpcode, unsigned NegOpcode,
593                               const SDLoc &DL);
594     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
595     SDValue MatchLoadCombine(SDNode *N);
596     SDValue mergeTruncStores(StoreSDNode *N);
597     SDValue ReduceLoadWidth(SDNode *N);
598     SDValue ReduceLoadOpStoreWidth(SDNode *N);
599     SDValue splitMergedValStore(StoreSDNode *ST);
600     SDValue TransformFPLoadStorePair(SDNode *N);
601     SDValue convertBuildVecZextToZext(SDNode *N);
602     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
603     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
604     SDValue reduceBuildVecToShuffle(SDNode *N);
605     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
606                                   ArrayRef<int> VectorMask, SDValue VecIn1,
607                                   SDValue VecIn2, unsigned LeftIdx,
608                                   bool DidSplitVec);
609     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
610 
611     /// Walk up chain skipping non-aliasing memory nodes,
612     /// looking for aliasing nodes and adding them to the Aliases vector.
613     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
614                           SmallVectorImpl<SDValue> &Aliases);
615 
616     /// Return true if there is any possibility that the two addresses overlap.
617     bool isAlias(SDNode *Op0, SDNode *Op1) const;
618 
619     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
620     /// chain (aliasing node.)
621     SDValue FindBetterChain(SDNode *N, SDValue Chain);
622 
623     /// Try to replace a store and any possibly adjacent stores on
624     /// consecutive chains with better chains. Return true only if St is
625     /// replaced.
626     ///
627     /// Notice that other chains may still be replaced even if the function
628     /// returns false.
629     bool findBetterNeighborChains(StoreSDNode *St);
630 
631     // Helper for findBetterNeighborChains. Walk up store chain add additional
632     // chained stores that do not overlap and can be parallelized.
633     bool parallelizeChainedStores(StoreSDNode *St);
634 
635     /// Holds a pointer to an LSBaseSDNode as well as information on where it
636     /// is located in a sequence of memory operations connected by a chain.
637     struct MemOpLink {
638       // Ptr to the mem node.
639       LSBaseSDNode *MemNode;
640 
641       // Offset from the base ptr.
642       int64_t OffsetFromBase;
643 
644       MemOpLink(LSBaseSDNode *N, int64_t Offset)
645           : MemNode(N), OffsetFromBase(Offset) {}
646     };
647 
648     // Classify the origin of a stored value.
649     enum class StoreSource { Unknown, Constant, Extract, Load };
650     StoreSource getStoreSource(SDValue StoreVal) {
651       switch (StoreVal.getOpcode()) {
652       case ISD::Constant:
653       case ISD::ConstantFP:
654         return StoreSource::Constant;
655       case ISD::EXTRACT_VECTOR_ELT:
656       case ISD::EXTRACT_SUBVECTOR:
657         return StoreSource::Extract;
658       case ISD::LOAD:
659         return StoreSource::Load;
660       default:
661         return StoreSource::Unknown;
662       }
663     }
664 
665     /// This is a helper function for visitMUL to check the profitability
666     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
667     /// MulNode is the original multiply, AddNode is (add x, c1),
668     /// and ConstNode is c2.
669     bool isMulAddWithConstProfitable(SDNode *MulNode,
670                                      SDValue &AddNode,
671                                      SDValue &ConstNode);
672 
673     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
674     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
675     /// the type of the loaded value to be extended.
676     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
677                           EVT LoadResultTy, EVT &ExtVT);
678 
679     /// Helper function to calculate whether the given Load/Store can have its
680     /// width reduced to ExtVT.
681     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
682                            EVT &MemVT, unsigned ShAmt = 0);
683 
684     /// Used by BackwardsPropagateMask to find suitable loads.
685     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
686                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
687                            ConstantSDNode *Mask, SDNode *&NodeToMask);
688     /// Attempt to propagate a given AND node back to load leaves so that they
689     /// can be combined into narrow loads.
690     bool BackwardsPropagateMask(SDNode *N);
691 
692     /// Helper function for mergeConsecutiveStores which merges the component
693     /// store chains.
694     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
695                                 unsigned NumStores);
696 
697     /// This is a helper function for mergeConsecutiveStores. When the source
698     /// elements of the consecutive stores are all constants or all extracted
699     /// vector elements, try to merge them into one larger store introducing
700     /// bitcasts if necessary.  \return True if a merged store was created.
701     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
702                                          EVT MemVT, unsigned NumStores,
703                                          bool IsConstantSrc, bool UseVector,
704                                          bool UseTrunc);
705 
706     /// This is a helper function for mergeConsecutiveStores. Stores that
707     /// potentially may be merged with St are placed in StoreNodes. RootNode is
708     /// a chain predecessor to all store candidates.
709     void getStoreMergeCandidates(StoreSDNode *St,
710                                  SmallVectorImpl<MemOpLink> &StoreNodes,
711                                  SDNode *&Root);
712 
713     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
714     /// have indirect dependency through their operands. RootNode is the
715     /// predecessor to all stores calculated by getStoreMergeCandidates and is
716     /// used to prune the dependency check. \return True if safe to merge.
717     bool checkMergeStoreCandidatesForDependencies(
718         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
719         SDNode *RootNode);
720 
721     /// This is a helper function for mergeConsecutiveStores. Given a list of
722     /// store candidates, find the first N that are consecutive in memory.
723     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
724     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
725                                   int64_t ElementSizeBytes) const;
726 
727     /// This is a helper function for mergeConsecutiveStores. It is used for
728     /// store chains that are composed entirely of constant values.
729     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
730                                   unsigned NumConsecutiveStores,
731                                   EVT MemVT, SDNode *Root, bool AllowVectors);
732 
733     /// This is a helper function for mergeConsecutiveStores. It is used for
734     /// store chains that are composed entirely of extracted vector elements.
735     /// When extracting multiple vector elements, try to store them in one
736     /// vector store rather than a sequence of scalar stores.
737     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
738                                  unsigned NumConsecutiveStores, EVT MemVT,
739                                  SDNode *Root);
740 
741     /// This is a helper function for mergeConsecutiveStores. It is used for
742     /// store chains that are composed entirely of loaded values.
743     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
744                               unsigned NumConsecutiveStores, EVT MemVT,
745                               SDNode *Root, bool AllowVectors,
746                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
747 
748     /// Merge consecutive store operations into a wide store.
749     /// This optimization uses wide integers or vectors when possible.
750     /// \return true if stores were merged.
751     bool mergeConsecutiveStores(StoreSDNode *St);
752 
753     /// Try to transform a truncation where C is a constant:
754     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
755     ///
756     /// \p N needs to be a truncation and its first operand an AND. Other
757     /// requirements are checked by the function (e.g. that trunc is
758     /// single-use) and if missed an empty SDValue is returned.
759     SDValue distributeTruncateThroughAnd(SDNode *N);
760 
761     /// Helper function to determine whether the target supports operation
762     /// given by \p Opcode for type \p VT, that is, whether the operation
763     /// is legal or custom before legalizing operations, and whether is
764     /// legal (but not custom) after legalization.
765     bool hasOperation(unsigned Opcode, EVT VT) {
766       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
767     }
768 
769   public:
770     /// Runs the dag combiner on all nodes in the work list
771     void Run(CombineLevel AtLevel);
772 
773     SelectionDAG &getDAG() const { return DAG; }
774 
775     /// Returns a type large enough to hold any valid shift amount - before type
776     /// legalization these can be huge.
777     EVT getShiftAmountTy(EVT LHSTy) {
778       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
779       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
780     }
781 
782     /// This method returns true if we are running before type legalization or
783     /// if the specified VT is legal.
784     bool isTypeLegal(const EVT &VT) {
785       if (!LegalTypes) return true;
786       return TLI.isTypeLegal(VT);
787     }
788 
789     /// Convenience wrapper around TargetLowering::getSetCCResultType
790     EVT getSetCCResultType(EVT VT) const {
791       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
792     }
793 
794     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
795                          SDValue OrigLoad, SDValue ExtLoad,
796                          ISD::NodeType ExtType);
797   };
798 
799 /// This class is a DAGUpdateListener that removes any deleted
800 /// nodes from the worklist.
801 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
802   DAGCombiner &DC;
803 
804 public:
805   explicit WorklistRemover(DAGCombiner &dc)
806     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
807 
808   void NodeDeleted(SDNode *N, SDNode *E) override {
809     DC.removeFromWorklist(N);
810   }
811 };
812 
813 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
814   DAGCombiner &DC;
815 
816 public:
817   explicit WorklistInserter(DAGCombiner &dc)
818       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
819 
820   // FIXME: Ideally we could add N to the worklist, but this causes exponential
821   //        compile time costs in large DAGs, e.g. Halide.
822   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
823 };
824 
825 } // end anonymous namespace
826 
827 //===----------------------------------------------------------------------===//
828 //  TargetLowering::DAGCombinerInfo implementation
829 //===----------------------------------------------------------------------===//
830 
831 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
832   ((DAGCombiner*)DC)->AddToWorklist(N);
833 }
834 
835 SDValue TargetLowering::DAGCombinerInfo::
836 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
837   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
838 }
839 
840 SDValue TargetLowering::DAGCombinerInfo::
841 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
842   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
843 }
844 
845 SDValue TargetLowering::DAGCombinerInfo::
846 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
847   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
848 }
849 
850 bool TargetLowering::DAGCombinerInfo::
851 recursivelyDeleteUnusedNodes(SDNode *N) {
852   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
853 }
854 
855 void TargetLowering::DAGCombinerInfo::
856 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
857   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
858 }
859 
860 //===----------------------------------------------------------------------===//
861 // Helper Functions
862 //===----------------------------------------------------------------------===//
863 
864 void DAGCombiner::deleteAndRecombine(SDNode *N) {
865   removeFromWorklist(N);
866 
867   // If the operands of this node are only used by the node, they will now be
868   // dead. Make sure to re-visit them and recursively delete dead nodes.
869   for (const SDValue &Op : N->ops())
870     // For an operand generating multiple values, one of the values may
871     // become dead allowing further simplification (e.g. split index
872     // arithmetic from an indexed load).
873     if (Op->hasOneUse() || Op->getNumValues() > 1)
874       AddToWorklist(Op.getNode());
875 
876   DAG.DeleteNode(N);
877 }
878 
879 // APInts must be the same size for most operations, this helper
880 // function zero extends the shorter of the pair so that they match.
881 // We provide an Offset so that we can create bitwidths that won't overflow.
882 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
883   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
884   LHS = LHS.zextOrSelf(Bits);
885   RHS = RHS.zextOrSelf(Bits);
886 }
887 
888 // Return true if this node is a setcc, or is a select_cc
889 // that selects between the target values used for true and false, making it
890 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
891 // the appropriate nodes based on the type of node we are checking. This
892 // simplifies life a bit for the callers.
893 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
894                                     SDValue &CC, bool MatchStrict) const {
895   if (N.getOpcode() == ISD::SETCC) {
896     LHS = N.getOperand(0);
897     RHS = N.getOperand(1);
898     CC  = N.getOperand(2);
899     return true;
900   }
901 
902   if (MatchStrict &&
903       (N.getOpcode() == ISD::STRICT_FSETCC ||
904        N.getOpcode() == ISD::STRICT_FSETCCS)) {
905     LHS = N.getOperand(1);
906     RHS = N.getOperand(2);
907     CC  = N.getOperand(3);
908     return true;
909   }
910 
911   if (N.getOpcode() != ISD::SELECT_CC ||
912       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
913       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
914     return false;
915 
916   if (TLI.getBooleanContents(N.getValueType()) ==
917       TargetLowering::UndefinedBooleanContent)
918     return false;
919 
920   LHS = N.getOperand(0);
921   RHS = N.getOperand(1);
922   CC  = N.getOperand(4);
923   return true;
924 }
925 
926 /// Return true if this is a SetCC-equivalent operation with only one use.
927 /// If this is true, it allows the users to invert the operation for free when
928 /// it is profitable to do so.
929 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
930   SDValue N0, N1, N2;
931   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
932     return true;
933   return false;
934 }
935 
936 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
937   if (!ScalarTy.isSimple())
938     return false;
939 
940   uint64_t MaskForTy = 0ULL;
941   switch (ScalarTy.getSimpleVT().SimpleTy) {
942   case MVT::i8:
943     MaskForTy = 0xFFULL;
944     break;
945   case MVT::i16:
946     MaskForTy = 0xFFFFULL;
947     break;
948   case MVT::i32:
949     MaskForTy = 0xFFFFFFFFULL;
950     break;
951   default:
952     return false;
953     break;
954   }
955 
956   APInt Val;
957   if (ISD::isConstantSplatVector(N, Val))
958     return Val.getLimitedValue() == MaskForTy;
959 
960   return false;
961 }
962 
963 // Determines if it is a constant integer or a splat/build vector of constant
964 // integers (and undefs).
965 // Do not permit build vector implicit truncation.
966 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
967   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
968     return !(Const->isOpaque() && NoOpaques);
969   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
970     return false;
971   unsigned BitWidth = N.getScalarValueSizeInBits();
972   for (const SDValue &Op : N->op_values()) {
973     if (Op.isUndef())
974       continue;
975     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
976     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
977         (Const->isOpaque() && NoOpaques))
978       return false;
979   }
980   return true;
981 }
982 
983 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
984 // undef's.
985 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
986   if (V.getOpcode() != ISD::BUILD_VECTOR)
987     return false;
988   return isConstantOrConstantVector(V, NoOpaques) ||
989          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
990 }
991 
992 // Determine if this an indexed load with an opaque target constant index.
993 static bool canSplitIdx(LoadSDNode *LD) {
994   return MaySplitLoadIndex &&
995          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
996           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
997 }
998 
999 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1000                                                              const SDLoc &DL,
1001                                                              SDValue N0,
1002                                                              SDValue N1) {
1003   // Currently this only tries to ensure we don't undo the GEP splits done by
1004   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1005   // we check if the following transformation would be problematic:
1006   // (load/store (add, (add, x, offset1), offset2)) ->
1007   // (load/store (add, x, offset1+offset2)).
1008 
1009   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1010     return false;
1011 
1012   if (N0.hasOneUse())
1013     return false;
1014 
1015   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1016   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1017   if (!C1 || !C2)
1018     return false;
1019 
1020   const APInt &C1APIntVal = C1->getAPIntValue();
1021   const APInt &C2APIntVal = C2->getAPIntValue();
1022   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1023     return false;
1024 
1025   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1026   if (CombinedValueIntVal.getBitWidth() > 64)
1027     return false;
1028   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1029 
1030   for (SDNode *Node : N0->uses()) {
1031     auto LoadStore = dyn_cast<MemSDNode>(Node);
1032     if (LoadStore) {
1033       // Is x[offset2] already not a legal addressing mode? If so then
1034       // reassociating the constants breaks nothing (we test offset2 because
1035       // that's the one we hope to fold into the load or store).
1036       TargetLoweringBase::AddrMode AM;
1037       AM.HasBaseReg = true;
1038       AM.BaseOffs = C2APIntVal.getSExtValue();
1039       EVT VT = LoadStore->getMemoryVT();
1040       unsigned AS = LoadStore->getAddressSpace();
1041       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1042       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1043         continue;
1044 
1045       // Would x[offset1+offset2] still be a legal addressing mode?
1046       AM.BaseOffs = CombinedValue;
1047       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1048         return true;
1049     }
1050   }
1051 
1052   return false;
1053 }
1054 
1055 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1056 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1057 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1058                                                SDValue N0, SDValue N1) {
1059   EVT VT = N0.getValueType();
1060 
1061   if (N0.getOpcode() != Opc)
1062     return SDValue();
1063 
1064   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1065     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1066       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1067       if (SDValue OpNode =
1068               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1069         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1070       return SDValue();
1071     }
1072     if (N0.hasOneUse()) {
1073       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1074       //              iff (op x, c1) has one use
1075       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1076       if (!OpNode.getNode())
1077         return SDValue();
1078       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1079     }
1080   }
1081   return SDValue();
1082 }
1083 
1084 // Try to reassociate commutative binops.
1085 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1086                                     SDValue N1, SDNodeFlags Flags) {
1087   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1088 
1089   // Floating-point reassociation is not allowed without loose FP math.
1090   if (N0.getValueType().isFloatingPoint() ||
1091       N1.getValueType().isFloatingPoint())
1092     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1093       return SDValue();
1094 
1095   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1096     return Combined;
1097   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1098     return Combined;
1099   return SDValue();
1100 }
1101 
1102 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1103                                bool AddTo) {
1104   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1105   ++NodesCombined;
1106   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1107              To[0].getNode()->dump(&DAG);
1108              dbgs() << " and " << NumTo - 1 << " other values\n");
1109   for (unsigned i = 0, e = NumTo; i != e; ++i)
1110     assert((!To[i].getNode() ||
1111             N->getValueType(i) == To[i].getValueType()) &&
1112            "Cannot combine value to value of different type!");
1113 
1114   WorklistRemover DeadNodes(*this);
1115   DAG.ReplaceAllUsesWith(N, To);
1116   if (AddTo) {
1117     // Push the new nodes and any users onto the worklist
1118     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1119       if (To[i].getNode()) {
1120         AddToWorklist(To[i].getNode());
1121         AddUsersToWorklist(To[i].getNode());
1122       }
1123     }
1124   }
1125 
1126   // Finally, if the node is now dead, remove it from the graph.  The node
1127   // may not be dead if the replacement process recursively simplified to
1128   // something else needing this node.
1129   if (N->use_empty())
1130     deleteAndRecombine(N);
1131   return SDValue(N, 0);
1132 }
1133 
1134 void DAGCombiner::
1135 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1136   // Replace the old value with the new one.
1137   ++NodesCombined;
1138   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1139              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1140              dbgs() << '\n');
1141 
1142   // Replace all uses.  If any nodes become isomorphic to other nodes and
1143   // are deleted, make sure to remove them from our worklist.
1144   WorklistRemover DeadNodes(*this);
1145   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1146 
1147   // Push the new node and any (possibly new) users onto the worklist.
1148   AddToWorklistWithUsers(TLO.New.getNode());
1149 
1150   // Finally, if the node is now dead, remove it from the graph.  The node
1151   // may not be dead if the replacement process recursively simplified to
1152   // something else needing this node.
1153   if (TLO.Old.getNode()->use_empty())
1154     deleteAndRecombine(TLO.Old.getNode());
1155 }
1156 
1157 /// Check the specified integer node value to see if it can be simplified or if
1158 /// things it uses can be simplified by bit propagation. If so, return true.
1159 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1160                                        const APInt &DemandedElts,
1161                                        bool AssumeSingleUse) {
1162   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1163   KnownBits Known;
1164   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1165                                 AssumeSingleUse))
1166     return false;
1167 
1168   // Revisit the node.
1169   AddToWorklist(Op.getNode());
1170 
1171   CommitTargetLoweringOpt(TLO);
1172   return true;
1173 }
1174 
1175 /// Check the specified vector node value to see if it can be simplified or
1176 /// if things it uses can be simplified as it only uses some of the elements.
1177 /// If so, return true.
1178 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1179                                              const APInt &DemandedElts,
1180                                              bool AssumeSingleUse) {
1181   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1182   APInt KnownUndef, KnownZero;
1183   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1184                                       TLO, 0, AssumeSingleUse))
1185     return false;
1186 
1187   // Revisit the node.
1188   AddToWorklist(Op.getNode());
1189 
1190   CommitTargetLoweringOpt(TLO);
1191   return true;
1192 }
1193 
1194 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1195   SDLoc DL(Load);
1196   EVT VT = Load->getValueType(0);
1197   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1198 
1199   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1200              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1201   WorklistRemover DeadNodes(*this);
1202   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1203   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1204   deleteAndRecombine(Load);
1205   AddToWorklist(Trunc.getNode());
1206 }
1207 
1208 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1209   Replace = false;
1210   SDLoc DL(Op);
1211   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1212     LoadSDNode *LD = cast<LoadSDNode>(Op);
1213     EVT MemVT = LD->getMemoryVT();
1214     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1215                                                       : LD->getExtensionType();
1216     Replace = true;
1217     return DAG.getExtLoad(ExtType, DL, PVT,
1218                           LD->getChain(), LD->getBasePtr(),
1219                           MemVT, LD->getMemOperand());
1220   }
1221 
1222   unsigned Opc = Op.getOpcode();
1223   switch (Opc) {
1224   default: break;
1225   case ISD::AssertSext:
1226     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1227       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1228     break;
1229   case ISD::AssertZext:
1230     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1231       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1232     break;
1233   case ISD::Constant: {
1234     unsigned ExtOpc =
1235       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1236     return DAG.getNode(ExtOpc, DL, PVT, Op);
1237   }
1238   }
1239 
1240   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1241     return SDValue();
1242   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1243 }
1244 
1245 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1246   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1247     return SDValue();
1248   EVT OldVT = Op.getValueType();
1249   SDLoc DL(Op);
1250   bool Replace = false;
1251   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1252   if (!NewOp.getNode())
1253     return SDValue();
1254   AddToWorklist(NewOp.getNode());
1255 
1256   if (Replace)
1257     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1258   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1259                      DAG.getValueType(OldVT));
1260 }
1261 
1262 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1263   EVT OldVT = Op.getValueType();
1264   SDLoc DL(Op);
1265   bool Replace = false;
1266   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1267   if (!NewOp.getNode())
1268     return SDValue();
1269   AddToWorklist(NewOp.getNode());
1270 
1271   if (Replace)
1272     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1273   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1274 }
1275 
1276 /// Promote the specified integer binary operation if the target indicates it is
1277 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1278 /// i32 since i16 instructions are longer.
1279 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1280   if (!LegalOperations)
1281     return SDValue();
1282 
1283   EVT VT = Op.getValueType();
1284   if (VT.isVector() || !VT.isInteger())
1285     return SDValue();
1286 
1287   // If operation type is 'undesirable', e.g. i16 on x86, consider
1288   // promoting it.
1289   unsigned Opc = Op.getOpcode();
1290   if (TLI.isTypeDesirableForOp(Opc, VT))
1291     return SDValue();
1292 
1293   EVT PVT = VT;
1294   // Consult target whether it is a good idea to promote this operation and
1295   // what's the right type to promote it to.
1296   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297     assert(PVT != VT && "Don't know what type to promote to!");
1298 
1299     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1300 
1301     bool Replace0 = false;
1302     SDValue N0 = Op.getOperand(0);
1303     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1304 
1305     bool Replace1 = false;
1306     SDValue N1 = Op.getOperand(1);
1307     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1308     SDLoc DL(Op);
1309 
1310     SDValue RV =
1311         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1312 
1313     // We are always replacing N0/N1's use in N and only need additional
1314     // replacements if there are additional uses.
1315     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1316     //       (SDValue) here because the node may reference multiple values
1317     //       (for example, the chain value of a load node).
1318     Replace0 &= !N0->hasOneUse();
1319     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1320 
1321     // Combine Op here so it is preserved past replacements.
1322     CombineTo(Op.getNode(), RV);
1323 
1324     // If operands have a use ordering, make sure we deal with
1325     // predecessor first.
1326     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1327       std::swap(N0, N1);
1328       std::swap(NN0, NN1);
1329     }
1330 
1331     if (Replace0) {
1332       AddToWorklist(NN0.getNode());
1333       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1334     }
1335     if (Replace1) {
1336       AddToWorklist(NN1.getNode());
1337       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1338     }
1339     return Op;
1340   }
1341   return SDValue();
1342 }
1343 
1344 /// Promote the specified integer shift operation if the target indicates it is
1345 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1346 /// i32 since i16 instructions are longer.
1347 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1348   if (!LegalOperations)
1349     return SDValue();
1350 
1351   EVT VT = Op.getValueType();
1352   if (VT.isVector() || !VT.isInteger())
1353     return SDValue();
1354 
1355   // If operation type is 'undesirable', e.g. i16 on x86, consider
1356   // promoting it.
1357   unsigned Opc = Op.getOpcode();
1358   if (TLI.isTypeDesirableForOp(Opc, VT))
1359     return SDValue();
1360 
1361   EVT PVT = VT;
1362   // Consult target whether it is a good idea to promote this operation and
1363   // what's the right type to promote it to.
1364   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1365     assert(PVT != VT && "Don't know what type to promote to!");
1366 
1367     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1368 
1369     bool Replace = false;
1370     SDValue N0 = Op.getOperand(0);
1371     SDValue N1 = Op.getOperand(1);
1372     if (Opc == ISD::SRA)
1373       N0 = SExtPromoteOperand(N0, PVT);
1374     else if (Opc == ISD::SRL)
1375       N0 = ZExtPromoteOperand(N0, PVT);
1376     else
1377       N0 = PromoteOperand(N0, PVT, Replace);
1378 
1379     if (!N0.getNode())
1380       return SDValue();
1381 
1382     SDLoc DL(Op);
1383     SDValue RV =
1384         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1385 
1386     if (Replace)
1387       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1388 
1389     // Deal with Op being deleted.
1390     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1391       return RV;
1392   }
1393   return SDValue();
1394 }
1395 
1396 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1397   if (!LegalOperations)
1398     return SDValue();
1399 
1400   EVT VT = Op.getValueType();
1401   if (VT.isVector() || !VT.isInteger())
1402     return SDValue();
1403 
1404   // If operation type is 'undesirable', e.g. i16 on x86, consider
1405   // promoting it.
1406   unsigned Opc = Op.getOpcode();
1407   if (TLI.isTypeDesirableForOp(Opc, VT))
1408     return SDValue();
1409 
1410   EVT PVT = VT;
1411   // Consult target whether it is a good idea to promote this operation and
1412   // what's the right type to promote it to.
1413   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1414     assert(PVT != VT && "Don't know what type to promote to!");
1415     // fold (aext (aext x)) -> (aext x)
1416     // fold (aext (zext x)) -> (zext x)
1417     // fold (aext (sext x)) -> (sext x)
1418     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1419     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1420   }
1421   return SDValue();
1422 }
1423 
1424 bool DAGCombiner::PromoteLoad(SDValue Op) {
1425   if (!LegalOperations)
1426     return false;
1427 
1428   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1429     return false;
1430 
1431   EVT VT = Op.getValueType();
1432   if (VT.isVector() || !VT.isInteger())
1433     return false;
1434 
1435   // If operation type is 'undesirable', e.g. i16 on x86, consider
1436   // promoting it.
1437   unsigned Opc = Op.getOpcode();
1438   if (TLI.isTypeDesirableForOp(Opc, VT))
1439     return false;
1440 
1441   EVT PVT = VT;
1442   // Consult target whether it is a good idea to promote this operation and
1443   // what's the right type to promote it to.
1444   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1445     assert(PVT != VT && "Don't know what type to promote to!");
1446 
1447     SDLoc DL(Op);
1448     SDNode *N = Op.getNode();
1449     LoadSDNode *LD = cast<LoadSDNode>(N);
1450     EVT MemVT = LD->getMemoryVT();
1451     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1452                                                       : LD->getExtensionType();
1453     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1454                                    LD->getChain(), LD->getBasePtr(),
1455                                    MemVT, LD->getMemOperand());
1456     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1457 
1458     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1459                Result.getNode()->dump(&DAG); dbgs() << '\n');
1460     WorklistRemover DeadNodes(*this);
1461     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1462     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1463     deleteAndRecombine(N);
1464     AddToWorklist(Result.getNode());
1465     return true;
1466   }
1467   return false;
1468 }
1469 
1470 /// Recursively delete a node which has no uses and any operands for
1471 /// which it is the only use.
1472 ///
1473 /// Note that this both deletes the nodes and removes them from the worklist.
1474 /// It also adds any nodes who have had a user deleted to the worklist as they
1475 /// may now have only one use and subject to other combines.
1476 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1477   if (!N->use_empty())
1478     return false;
1479 
1480   SmallSetVector<SDNode *, 16> Nodes;
1481   Nodes.insert(N);
1482   do {
1483     N = Nodes.pop_back_val();
1484     if (!N)
1485       continue;
1486 
1487     if (N->use_empty()) {
1488       for (const SDValue &ChildN : N->op_values())
1489         Nodes.insert(ChildN.getNode());
1490 
1491       removeFromWorklist(N);
1492       DAG.DeleteNode(N);
1493     } else {
1494       AddToWorklist(N);
1495     }
1496   } while (!Nodes.empty());
1497   return true;
1498 }
1499 
1500 //===----------------------------------------------------------------------===//
1501 //  Main DAG Combiner implementation
1502 //===----------------------------------------------------------------------===//
1503 
1504 void DAGCombiner::Run(CombineLevel AtLevel) {
1505   // set the instance variables, so that the various visit routines may use it.
1506   Level = AtLevel;
1507   LegalDAG = Level >= AfterLegalizeDAG;
1508   LegalOperations = Level >= AfterLegalizeVectorOps;
1509   LegalTypes = Level >= AfterLegalizeTypes;
1510 
1511   WorklistInserter AddNodes(*this);
1512 
1513   // Add all the dag nodes to the worklist.
1514   for (SDNode &Node : DAG.allnodes())
1515     AddToWorklist(&Node);
1516 
1517   // Create a dummy node (which is not added to allnodes), that adds a reference
1518   // to the root node, preventing it from being deleted, and tracking any
1519   // changes of the root.
1520   HandleSDNode Dummy(DAG.getRoot());
1521 
1522   // While we have a valid worklist entry node, try to combine it.
1523   while (SDNode *N = getNextWorklistEntry()) {
1524     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1525     // N is deleted from the DAG, since they too may now be dead or may have a
1526     // reduced number of uses, allowing other xforms.
1527     if (recursivelyDeleteUnusedNodes(N))
1528       continue;
1529 
1530     WorklistRemover DeadNodes(*this);
1531 
1532     // If this combine is running after legalizing the DAG, re-legalize any
1533     // nodes pulled off the worklist.
1534     if (LegalDAG) {
1535       SmallSetVector<SDNode *, 16> UpdatedNodes;
1536       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1537 
1538       for (SDNode *LN : UpdatedNodes)
1539         AddToWorklistWithUsers(LN);
1540 
1541       if (!NIsValid)
1542         continue;
1543     }
1544 
1545     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1546 
1547     // Add any operands of the new node which have not yet been combined to the
1548     // worklist as well. Because the worklist uniques things already, this
1549     // won't repeatedly process the same operand.
1550     CombinedNodes.insert(N);
1551     for (const SDValue &ChildN : N->op_values())
1552       if (!CombinedNodes.count(ChildN.getNode()))
1553         AddToWorklist(ChildN.getNode());
1554 
1555     SDValue RV = combine(N);
1556 
1557     if (!RV.getNode())
1558       continue;
1559 
1560     ++NodesCombined;
1561 
1562     // If we get back the same node we passed in, rather than a new node or
1563     // zero, we know that the node must have defined multiple values and
1564     // CombineTo was used.  Since CombineTo takes care of the worklist
1565     // mechanics for us, we have no work to do in this case.
1566     if (RV.getNode() == N)
1567       continue;
1568 
1569     assert(N->getOpcode() != ISD::DELETED_NODE &&
1570            RV.getOpcode() != ISD::DELETED_NODE &&
1571            "Node was deleted but visit returned new node!");
1572 
1573     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1574 
1575     if (N->getNumValues() == RV.getNode()->getNumValues())
1576       DAG.ReplaceAllUsesWith(N, RV.getNode());
1577     else {
1578       assert(N->getValueType(0) == RV.getValueType() &&
1579              N->getNumValues() == 1 && "Type mismatch");
1580       DAG.ReplaceAllUsesWith(N, &RV);
1581     }
1582 
1583     // Push the new node and any users onto the worklist.  Omit this if the
1584     // new node is the EntryToken (e.g. if a store managed to get optimized
1585     // out), because re-visiting the EntryToken and its users will not uncover
1586     // any additional opportunities, but there may be a large number of such
1587     // users, potentially causing compile time explosion.
1588     if (RV.getOpcode() != ISD::EntryToken) {
1589       AddToWorklist(RV.getNode());
1590       AddUsersToWorklist(RV.getNode());
1591     }
1592 
1593     // Finally, if the node is now dead, remove it from the graph.  The node
1594     // may not be dead if the replacement process recursively simplified to
1595     // something else needing this node. This will also take care of adding any
1596     // operands which have lost a user to the worklist.
1597     recursivelyDeleteUnusedNodes(N);
1598   }
1599 
1600   // If the root changed (e.g. it was a dead load, update the root).
1601   DAG.setRoot(Dummy.getValue());
1602   DAG.RemoveDeadNodes();
1603 }
1604 
1605 SDValue DAGCombiner::visit(SDNode *N) {
1606   switch (N->getOpcode()) {
1607   default: break;
1608   case ISD::TokenFactor:        return visitTokenFactor(N);
1609   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1610   case ISD::ADD:                return visitADD(N);
1611   case ISD::SUB:                return visitSUB(N);
1612   case ISD::SADDSAT:
1613   case ISD::UADDSAT:            return visitADDSAT(N);
1614   case ISD::SSUBSAT:
1615   case ISD::USUBSAT:            return visitSUBSAT(N);
1616   case ISD::ADDC:               return visitADDC(N);
1617   case ISD::SADDO:
1618   case ISD::UADDO:              return visitADDO(N);
1619   case ISD::SUBC:               return visitSUBC(N);
1620   case ISD::SSUBO:
1621   case ISD::USUBO:              return visitSUBO(N);
1622   case ISD::ADDE:               return visitADDE(N);
1623   case ISD::ADDCARRY:           return visitADDCARRY(N);
1624   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1625   case ISD::SUBE:               return visitSUBE(N);
1626   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1627   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1628   case ISD::SMULFIX:
1629   case ISD::SMULFIXSAT:
1630   case ISD::UMULFIX:
1631   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1632   case ISD::MUL:                return visitMUL(N);
1633   case ISD::SDIV:               return visitSDIV(N);
1634   case ISD::UDIV:               return visitUDIV(N);
1635   case ISD::SREM:
1636   case ISD::UREM:               return visitREM(N);
1637   case ISD::MULHU:              return visitMULHU(N);
1638   case ISD::MULHS:              return visitMULHS(N);
1639   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1640   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1641   case ISD::SMULO:
1642   case ISD::UMULO:              return visitMULO(N);
1643   case ISD::SMIN:
1644   case ISD::SMAX:
1645   case ISD::UMIN:
1646   case ISD::UMAX:               return visitIMINMAX(N);
1647   case ISD::AND:                return visitAND(N);
1648   case ISD::OR:                 return visitOR(N);
1649   case ISD::XOR:                return visitXOR(N);
1650   case ISD::SHL:                return visitSHL(N);
1651   case ISD::SRA:                return visitSRA(N);
1652   case ISD::SRL:                return visitSRL(N);
1653   case ISD::ROTR:
1654   case ISD::ROTL:               return visitRotate(N);
1655   case ISD::FSHL:
1656   case ISD::FSHR:               return visitFunnelShift(N);
1657   case ISD::ABS:                return visitABS(N);
1658   case ISD::BSWAP:              return visitBSWAP(N);
1659   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1660   case ISD::CTLZ:               return visitCTLZ(N);
1661   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1662   case ISD::CTTZ:               return visitCTTZ(N);
1663   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1664   case ISD::CTPOP:              return visitCTPOP(N);
1665   case ISD::SELECT:             return visitSELECT(N);
1666   case ISD::VSELECT:            return visitVSELECT(N);
1667   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1668   case ISD::SETCC:              return visitSETCC(N);
1669   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1670   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1671   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1672   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1673   case ISD::AssertSext:
1674   case ISD::AssertZext:         return visitAssertExt(N);
1675   case ISD::AssertAlign:        return visitAssertAlign(N);
1676   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1677   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1678   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1679   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1680   case ISD::BITCAST:            return visitBITCAST(N);
1681   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1682   case ISD::FADD:               return visitFADD(N);
1683   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1684   case ISD::FSUB:               return visitFSUB(N);
1685   case ISD::FMUL:               return visitFMUL(N);
1686   case ISD::FMA:                return visitFMA(N);
1687   case ISD::FDIV:               return visitFDIV(N);
1688   case ISD::FREM:               return visitFREM(N);
1689   case ISD::FSQRT:              return visitFSQRT(N);
1690   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1691   case ISD::FPOW:               return visitFPOW(N);
1692   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1693   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1694   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1695   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1696   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1697   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1698   case ISD::FNEG:               return visitFNEG(N);
1699   case ISD::FABS:               return visitFABS(N);
1700   case ISD::FFLOOR:             return visitFFLOOR(N);
1701   case ISD::FMINNUM:            return visitFMINNUM(N);
1702   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1703   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1704   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1705   case ISD::FCEIL:              return visitFCEIL(N);
1706   case ISD::FTRUNC:             return visitFTRUNC(N);
1707   case ISD::BRCOND:             return visitBRCOND(N);
1708   case ISD::BR_CC:              return visitBR_CC(N);
1709   case ISD::LOAD:               return visitLOAD(N);
1710   case ISD::STORE:              return visitSTORE(N);
1711   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1712   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1713   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1714   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1715   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1716   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1717   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1718   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1719   case ISD::MGATHER:            return visitMGATHER(N);
1720   case ISD::MLOAD:              return visitMLOAD(N);
1721   case ISD::MSCATTER:           return visitMSCATTER(N);
1722   case ISD::MSTORE:             return visitMSTORE(N);
1723   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1724   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1725   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1726   case ISD::FREEZE:             return visitFREEZE(N);
1727   case ISD::VECREDUCE_FADD:
1728   case ISD::VECREDUCE_FMUL:
1729   case ISD::VECREDUCE_ADD:
1730   case ISD::VECREDUCE_MUL:
1731   case ISD::VECREDUCE_AND:
1732   case ISD::VECREDUCE_OR:
1733   case ISD::VECREDUCE_XOR:
1734   case ISD::VECREDUCE_SMAX:
1735   case ISD::VECREDUCE_SMIN:
1736   case ISD::VECREDUCE_UMAX:
1737   case ISD::VECREDUCE_UMIN:
1738   case ISD::VECREDUCE_FMAX:
1739   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1740   }
1741   return SDValue();
1742 }
1743 
1744 SDValue DAGCombiner::combine(SDNode *N) {
1745   SDValue RV;
1746   if (!DisableGenericCombines)
1747     RV = visit(N);
1748 
1749   // If nothing happened, try a target-specific DAG combine.
1750   if (!RV.getNode()) {
1751     assert(N->getOpcode() != ISD::DELETED_NODE &&
1752            "Node was deleted but visit returned NULL!");
1753 
1754     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1755         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1756 
1757       // Expose the DAG combiner to the target combiner impls.
1758       TargetLowering::DAGCombinerInfo
1759         DagCombineInfo(DAG, Level, false, this);
1760 
1761       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1762     }
1763   }
1764 
1765   // If nothing happened still, try promoting the operation.
1766   if (!RV.getNode()) {
1767     switch (N->getOpcode()) {
1768     default: break;
1769     case ISD::ADD:
1770     case ISD::SUB:
1771     case ISD::MUL:
1772     case ISD::AND:
1773     case ISD::OR:
1774     case ISD::XOR:
1775       RV = PromoteIntBinOp(SDValue(N, 0));
1776       break;
1777     case ISD::SHL:
1778     case ISD::SRA:
1779     case ISD::SRL:
1780       RV = PromoteIntShiftOp(SDValue(N, 0));
1781       break;
1782     case ISD::SIGN_EXTEND:
1783     case ISD::ZERO_EXTEND:
1784     case ISD::ANY_EXTEND:
1785       RV = PromoteExtend(SDValue(N, 0));
1786       break;
1787     case ISD::LOAD:
1788       if (PromoteLoad(SDValue(N, 0)))
1789         RV = SDValue(N, 0);
1790       break;
1791     }
1792   }
1793 
1794   // If N is a commutative binary node, try to eliminate it if the commuted
1795   // version is already present in the DAG.
1796   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1797       N->getNumValues() == 1) {
1798     SDValue N0 = N->getOperand(0);
1799     SDValue N1 = N->getOperand(1);
1800 
1801     // Constant operands are canonicalized to RHS.
1802     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1803       SDValue Ops[] = {N1, N0};
1804       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1805                                             N->getFlags());
1806       if (CSENode)
1807         return SDValue(CSENode, 0);
1808     }
1809   }
1810 
1811   return RV;
1812 }
1813 
1814 /// Given a node, return its input chain if it has one, otherwise return a null
1815 /// sd operand.
1816 static SDValue getInputChainForNode(SDNode *N) {
1817   if (unsigned NumOps = N->getNumOperands()) {
1818     if (N->getOperand(0).getValueType() == MVT::Other)
1819       return N->getOperand(0);
1820     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1821       return N->getOperand(NumOps-1);
1822     for (unsigned i = 1; i < NumOps-1; ++i)
1823       if (N->getOperand(i).getValueType() == MVT::Other)
1824         return N->getOperand(i);
1825   }
1826   return SDValue();
1827 }
1828 
1829 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1830   // If N has two operands, where one has an input chain equal to the other,
1831   // the 'other' chain is redundant.
1832   if (N->getNumOperands() == 2) {
1833     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1834       return N->getOperand(0);
1835     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1836       return N->getOperand(1);
1837   }
1838 
1839   // Don't simplify token factors if optnone.
1840   if (OptLevel == CodeGenOpt::None)
1841     return SDValue();
1842 
1843   // Don't simplify the token factor if the node itself has too many operands.
1844   if (N->getNumOperands() > TokenFactorInlineLimit)
1845     return SDValue();
1846 
1847   // If the sole user is a token factor, we should make sure we have a
1848   // chance to merge them together. This prevents TF chains from inhibiting
1849   // optimizations.
1850   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1851     AddToWorklist(*(N->use_begin()));
1852 
1853   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1854   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1855   SmallPtrSet<SDNode*, 16> SeenOps;
1856   bool Changed = false;             // If we should replace this token factor.
1857 
1858   // Start out with this token factor.
1859   TFs.push_back(N);
1860 
1861   // Iterate through token factors.  The TFs grows when new token factors are
1862   // encountered.
1863   for (unsigned i = 0; i < TFs.size(); ++i) {
1864     // Limit number of nodes to inline, to avoid quadratic compile times.
1865     // We have to add the outstanding Token Factors to Ops, otherwise we might
1866     // drop Ops from the resulting Token Factors.
1867     if (Ops.size() > TokenFactorInlineLimit) {
1868       for (unsigned j = i; j < TFs.size(); j++)
1869         Ops.emplace_back(TFs[j], 0);
1870       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1871       // combiner worklist later.
1872       TFs.resize(i);
1873       break;
1874     }
1875 
1876     SDNode *TF = TFs[i];
1877     // Check each of the operands.
1878     for (const SDValue &Op : TF->op_values()) {
1879       switch (Op.getOpcode()) {
1880       case ISD::EntryToken:
1881         // Entry tokens don't need to be added to the list. They are
1882         // redundant.
1883         Changed = true;
1884         break;
1885 
1886       case ISD::TokenFactor:
1887         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1888           // Queue up for processing.
1889           TFs.push_back(Op.getNode());
1890           Changed = true;
1891           break;
1892         }
1893         LLVM_FALLTHROUGH;
1894 
1895       default:
1896         // Only add if it isn't already in the list.
1897         if (SeenOps.insert(Op.getNode()).second)
1898           Ops.push_back(Op);
1899         else
1900           Changed = true;
1901         break;
1902       }
1903     }
1904   }
1905 
1906   // Re-visit inlined Token Factors, to clean them up in case they have been
1907   // removed. Skip the first Token Factor, as this is the current node.
1908   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1909     AddToWorklist(TFs[i]);
1910 
1911   // Remove Nodes that are chained to another node in the list. Do so
1912   // by walking up chains breath-first stopping when we've seen
1913   // another operand. In general we must climb to the EntryNode, but we can exit
1914   // early if we find all remaining work is associated with just one operand as
1915   // no further pruning is possible.
1916 
1917   // List of nodes to search through and original Ops from which they originate.
1918   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1919   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1920   SmallPtrSet<SDNode *, 16> SeenChains;
1921   bool DidPruneOps = false;
1922 
1923   unsigned NumLeftToConsider = 0;
1924   for (const SDValue &Op : Ops) {
1925     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1926     OpWorkCount.push_back(1);
1927   }
1928 
1929   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1930     // If this is an Op, we can remove the op from the list. Remark any
1931     // search associated with it as from the current OpNumber.
1932     if (SeenOps.contains(Op)) {
1933       Changed = true;
1934       DidPruneOps = true;
1935       unsigned OrigOpNumber = 0;
1936       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1937         OrigOpNumber++;
1938       assert((OrigOpNumber != Ops.size()) &&
1939              "expected to find TokenFactor Operand");
1940       // Re-mark worklist from OrigOpNumber to OpNumber
1941       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1942         if (Worklist[i].second == OrigOpNumber) {
1943           Worklist[i].second = OpNumber;
1944         }
1945       }
1946       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1947       OpWorkCount[OrigOpNumber] = 0;
1948       NumLeftToConsider--;
1949     }
1950     // Add if it's a new chain
1951     if (SeenChains.insert(Op).second) {
1952       OpWorkCount[OpNumber]++;
1953       Worklist.push_back(std::make_pair(Op, OpNumber));
1954     }
1955   };
1956 
1957   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1958     // We need at least be consider at least 2 Ops to prune.
1959     if (NumLeftToConsider <= 1)
1960       break;
1961     auto CurNode = Worklist[i].first;
1962     auto CurOpNumber = Worklist[i].second;
1963     assert((OpWorkCount[CurOpNumber] > 0) &&
1964            "Node should not appear in worklist");
1965     switch (CurNode->getOpcode()) {
1966     case ISD::EntryToken:
1967       // Hitting EntryToken is the only way for the search to terminate without
1968       // hitting
1969       // another operand's search. Prevent us from marking this operand
1970       // considered.
1971       NumLeftToConsider++;
1972       break;
1973     case ISD::TokenFactor:
1974       for (const SDValue &Op : CurNode->op_values())
1975         AddToWorklist(i, Op.getNode(), CurOpNumber);
1976       break;
1977     case ISD::LIFETIME_START:
1978     case ISD::LIFETIME_END:
1979     case ISD::CopyFromReg:
1980     case ISD::CopyToReg:
1981       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1982       break;
1983     default:
1984       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1985         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1986       break;
1987     }
1988     OpWorkCount[CurOpNumber]--;
1989     if (OpWorkCount[CurOpNumber] == 0)
1990       NumLeftToConsider--;
1991   }
1992 
1993   // If we've changed things around then replace token factor.
1994   if (Changed) {
1995     SDValue Result;
1996     if (Ops.empty()) {
1997       // The entry token is the only possible outcome.
1998       Result = DAG.getEntryNode();
1999     } else {
2000       if (DidPruneOps) {
2001         SmallVector<SDValue, 8> PrunedOps;
2002         //
2003         for (const SDValue &Op : Ops) {
2004           if (SeenChains.count(Op.getNode()) == 0)
2005             PrunedOps.push_back(Op);
2006         }
2007         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2008       } else {
2009         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2010       }
2011     }
2012     return Result;
2013   }
2014   return SDValue();
2015 }
2016 
2017 /// MERGE_VALUES can always be eliminated.
2018 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2019   WorklistRemover DeadNodes(*this);
2020   // Replacing results may cause a different MERGE_VALUES to suddenly
2021   // be CSE'd with N, and carry its uses with it. Iterate until no
2022   // uses remain, to ensure that the node can be safely deleted.
2023   // First add the users of this node to the work list so that they
2024   // can be tried again once they have new operands.
2025   AddUsersToWorklist(N);
2026   do {
2027     // Do as a single replacement to avoid rewalking use lists.
2028     SmallVector<SDValue, 8> Ops;
2029     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2030       Ops.push_back(N->getOperand(i));
2031     DAG.ReplaceAllUsesWith(N, Ops.data());
2032   } while (!N->use_empty());
2033   deleteAndRecombine(N);
2034   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2035 }
2036 
2037 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2038 /// ConstantSDNode pointer else nullptr.
2039 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2040   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2041   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2042 }
2043 
2044 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2045 /// and that N may be folded in the load / store addressing mode.
2046 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2047                                     const TargetLowering &TLI) {
2048   EVT VT;
2049   unsigned AS;
2050 
2051   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2052     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2053       return false;
2054     VT = LD->getMemoryVT();
2055     AS = LD->getAddressSpace();
2056   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2057     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2058       return false;
2059     VT = ST->getMemoryVT();
2060     AS = ST->getAddressSpace();
2061   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2062     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2063       return false;
2064     VT = LD->getMemoryVT();
2065     AS = LD->getAddressSpace();
2066   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2067     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2068       return false;
2069     VT = ST->getMemoryVT();
2070     AS = ST->getAddressSpace();
2071   } else
2072     return false;
2073 
2074   TargetLowering::AddrMode AM;
2075   if (N->getOpcode() == ISD::ADD) {
2076     AM.HasBaseReg = true;
2077     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2078     if (Offset)
2079       // [reg +/- imm]
2080       AM.BaseOffs = Offset->getSExtValue();
2081     else
2082       // [reg +/- reg]
2083       AM.Scale = 1;
2084   } else if (N->getOpcode() == ISD::SUB) {
2085     AM.HasBaseReg = true;
2086     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087     if (Offset)
2088       // [reg +/- imm]
2089       AM.BaseOffs = -Offset->getSExtValue();
2090     else
2091       // [reg +/- reg]
2092       AM.Scale = 1;
2093   } else
2094     return false;
2095 
2096   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2097                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2098 }
2099 
2100 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2101   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2102          "Unexpected binary operator");
2103 
2104   // Don't do this unless the old select is going away. We want to eliminate the
2105   // binary operator, not replace a binop with a select.
2106   // TODO: Handle ISD::SELECT_CC.
2107   unsigned SelOpNo = 0;
2108   SDValue Sel = BO->getOperand(0);
2109   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2110     SelOpNo = 1;
2111     Sel = BO->getOperand(1);
2112   }
2113 
2114   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2115     return SDValue();
2116 
2117   SDValue CT = Sel.getOperand(1);
2118   if (!isConstantOrConstantVector(CT, true) &&
2119       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2120     return SDValue();
2121 
2122   SDValue CF = Sel.getOperand(2);
2123   if (!isConstantOrConstantVector(CF, true) &&
2124       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2125     return SDValue();
2126 
2127   // Bail out if any constants are opaque because we can't constant fold those.
2128   // The exception is "and" and "or" with either 0 or -1 in which case we can
2129   // propagate non constant operands into select. I.e.:
2130   // and (select Cond, 0, -1), X --> select Cond, 0, X
2131   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2132   auto BinOpcode = BO->getOpcode();
2133   bool CanFoldNonConst =
2134       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2135       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2136       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2137 
2138   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2139   if (!CanFoldNonConst &&
2140       !isConstantOrConstantVector(CBO, true) &&
2141       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2142     return SDValue();
2143 
2144   EVT VT = BO->getValueType(0);
2145 
2146   // We have a select-of-constants followed by a binary operator with a
2147   // constant. Eliminate the binop by pulling the constant math into the select.
2148   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2149   SDLoc DL(Sel);
2150   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2151                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2152   if (!CanFoldNonConst && !NewCT.isUndef() &&
2153       !isConstantOrConstantVector(NewCT, true) &&
2154       !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
2155     return SDValue();
2156 
2157   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2158                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2159   if (!CanFoldNonConst && !NewCF.isUndef() &&
2160       !isConstantOrConstantVector(NewCF, true) &&
2161       !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
2162     return SDValue();
2163 
2164   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2165   SelectOp->setFlags(BO->getFlags());
2166   return SelectOp;
2167 }
2168 
2169 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2170   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2171          "Expecting add or sub");
2172 
2173   // Match a constant operand and a zext operand for the math instruction:
2174   // add Z, C
2175   // sub C, Z
2176   bool IsAdd = N->getOpcode() == ISD::ADD;
2177   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2178   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2179   auto *CN = dyn_cast<ConstantSDNode>(C);
2180   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2181     return SDValue();
2182 
2183   // Match the zext operand as a setcc of a boolean.
2184   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2185       Z.getOperand(0).getValueType() != MVT::i1)
2186     return SDValue();
2187 
2188   // Match the compare as: setcc (X & 1), 0, eq.
2189   SDValue SetCC = Z.getOperand(0);
2190   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2191   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2192       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2193       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2194     return SDValue();
2195 
2196   // We are adding/subtracting a constant and an inverted low bit. Turn that
2197   // into a subtract/add of the low bit with incremented/decremented constant:
2198   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2199   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2200   EVT VT = C.getValueType();
2201   SDLoc DL(N);
2202   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2203   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2204                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2205   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2206 }
2207 
2208 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2209 /// a shift and add with a different constant.
2210 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2211   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2212          "Expecting add or sub");
2213 
2214   // We need a constant operand for the add/sub, and the other operand is a
2215   // logical shift right: add (srl), C or sub C, (srl).
2216   bool IsAdd = N->getOpcode() == ISD::ADD;
2217   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2218   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2219   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2220       ShiftOp.getOpcode() != ISD::SRL)
2221     return SDValue();
2222 
2223   // The shift must be of a 'not' value.
2224   SDValue Not = ShiftOp.getOperand(0);
2225   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2226     return SDValue();
2227 
2228   // The shift must be moving the sign bit to the least-significant-bit.
2229   EVT VT = ShiftOp.getValueType();
2230   SDValue ShAmt = ShiftOp.getOperand(1);
2231   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2232   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2233     return SDValue();
2234 
2235   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2236   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2237   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2238   SDLoc DL(N);
2239   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2240   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2241   if (SDValue NewC =
2242           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2243                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2244     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2245   return SDValue();
2246 }
2247 
2248 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2249 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2250 /// are no common bits set in the operands).
2251 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2252   SDValue N0 = N->getOperand(0);
2253   SDValue N1 = N->getOperand(1);
2254   EVT VT = N0.getValueType();
2255   SDLoc DL(N);
2256 
2257   // fold vector ops
2258   if (VT.isVector()) {
2259     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2260       return FoldedVOp;
2261 
2262     // fold (add x, 0) -> x, vector edition
2263     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2264       return N0;
2265     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2266       return N1;
2267   }
2268 
2269   // fold (add x, undef) -> undef
2270   if (N0.isUndef())
2271     return N0;
2272 
2273   if (N1.isUndef())
2274     return N1;
2275 
2276   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2277     // canonicalize constant to RHS
2278     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2279       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2280     // fold (add c1, c2) -> c1+c2
2281     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2282   }
2283 
2284   // fold (add x, 0) -> x
2285   if (isNullConstant(N1))
2286     return N0;
2287 
2288   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2289     // fold ((A-c1)+c2) -> (A+(c2-c1))
2290     if (N0.getOpcode() == ISD::SUB &&
2291         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2292       SDValue Sub =
2293           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2294       assert(Sub && "Constant folding failed");
2295       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2296     }
2297 
2298     // fold ((c1-A)+c2) -> (c1+c2)-A
2299     if (N0.getOpcode() == ISD::SUB &&
2300         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2301       SDValue Add =
2302           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2303       assert(Add && "Constant folding failed");
2304       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2305     }
2306 
2307     // add (sext i1 X), 1 -> zext (not i1 X)
2308     // We don't transform this pattern:
2309     //   add (zext i1 X), -1 -> sext (not i1 X)
2310     // because most (?) targets generate better code for the zext form.
2311     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2312         isOneOrOneSplat(N1)) {
2313       SDValue X = N0.getOperand(0);
2314       if ((!LegalOperations ||
2315            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2316             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2317           X.getScalarValueSizeInBits() == 1) {
2318         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2319         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2320       }
2321     }
2322 
2323     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2324     // equivalent to (add x, c0).
2325     if (N0.getOpcode() == ISD::OR &&
2326         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2327         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2328       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2329                                                     {N1, N0.getOperand(1)}))
2330         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2331     }
2332   }
2333 
2334   if (SDValue NewSel = foldBinOpIntoSelect(N))
2335     return NewSel;
2336 
2337   // reassociate add
2338   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2339     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2340       return RADD;
2341   }
2342   // fold ((0-A) + B) -> B-A
2343   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2344     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2345 
2346   // fold (A + (0-B)) -> A-B
2347   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2348     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2349 
2350   // fold (A+(B-A)) -> B
2351   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2352     return N1.getOperand(0);
2353 
2354   // fold ((B-A)+A) -> B
2355   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2356     return N0.getOperand(0);
2357 
2358   // fold ((A-B)+(C-A)) -> (C-B)
2359   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2360       N0.getOperand(0) == N1.getOperand(1))
2361     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2362                        N0.getOperand(1));
2363 
2364   // fold ((A-B)+(B-C)) -> (A-C)
2365   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2366       N0.getOperand(1) == N1.getOperand(0))
2367     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2368                        N1.getOperand(1));
2369 
2370   // fold (A+(B-(A+C))) to (B-C)
2371   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2372       N0 == N1.getOperand(1).getOperand(0))
2373     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2374                        N1.getOperand(1).getOperand(1));
2375 
2376   // fold (A+(B-(C+A))) to (B-C)
2377   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2378       N0 == N1.getOperand(1).getOperand(1))
2379     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2380                        N1.getOperand(1).getOperand(0));
2381 
2382   // fold (A+((B-A)+or-C)) to (B+or-C)
2383   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2384       N1.getOperand(0).getOpcode() == ISD::SUB &&
2385       N0 == N1.getOperand(0).getOperand(1))
2386     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2387                        N1.getOperand(1));
2388 
2389   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2390   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2391     SDValue N00 = N0.getOperand(0);
2392     SDValue N01 = N0.getOperand(1);
2393     SDValue N10 = N1.getOperand(0);
2394     SDValue N11 = N1.getOperand(1);
2395 
2396     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2397       return DAG.getNode(ISD::SUB, DL, VT,
2398                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2399                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2400   }
2401 
2402   // fold (add (umax X, C), -C) --> (usubsat X, C)
2403   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2404     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2405       return (!Max && !Op) ||
2406              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2407     };
2408     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2409                                   /*AllowUndefs*/ true))
2410       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2411                          N0.getOperand(1));
2412   }
2413 
2414   if (SimplifyDemandedBits(SDValue(N, 0)))
2415     return SDValue(N, 0);
2416 
2417   if (isOneOrOneSplat(N1)) {
2418     // fold (add (xor a, -1), 1) -> (sub 0, a)
2419     if (isBitwiseNot(N0))
2420       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2421                          N0.getOperand(0));
2422 
2423     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2424     if (N0.getOpcode() == ISD::ADD ||
2425         N0.getOpcode() == ISD::UADDO ||
2426         N0.getOpcode() == ISD::SADDO) {
2427       SDValue A, Xor;
2428 
2429       if (isBitwiseNot(N0.getOperand(0))) {
2430         A = N0.getOperand(1);
2431         Xor = N0.getOperand(0);
2432       } else if (isBitwiseNot(N0.getOperand(1))) {
2433         A = N0.getOperand(0);
2434         Xor = N0.getOperand(1);
2435       }
2436 
2437       if (Xor)
2438         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2439     }
2440 
2441     // Look for:
2442     //   add (add x, y), 1
2443     // And if the target does not like this form then turn into:
2444     //   sub y, (xor x, -1)
2445     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2446         N0.getOpcode() == ISD::ADD) {
2447       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2448                                 DAG.getAllOnesConstant(DL, VT));
2449       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2450     }
2451   }
2452 
2453   // (x - y) + -1  ->  add (xor y, -1), x
2454   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2455       isAllOnesOrAllOnesSplat(N1)) {
2456     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2457     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2458   }
2459 
2460   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2461     return Combined;
2462 
2463   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2464     return Combined;
2465 
2466   return SDValue();
2467 }
2468 
2469 SDValue DAGCombiner::visitADD(SDNode *N) {
2470   SDValue N0 = N->getOperand(0);
2471   SDValue N1 = N->getOperand(1);
2472   EVT VT = N0.getValueType();
2473   SDLoc DL(N);
2474 
2475   if (SDValue Combined = visitADDLike(N))
2476     return Combined;
2477 
2478   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2479     return V;
2480 
2481   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2482     return V;
2483 
2484   // fold (a+b) -> (a|b) iff a and b share no bits.
2485   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2486       DAG.haveNoCommonBitsSet(N0, N1))
2487     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2488 
2489   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2490   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2491     const APInt &C0 = N0->getConstantOperandAPInt(0);
2492     const APInt &C1 = N1->getConstantOperandAPInt(0);
2493     return DAG.getVScale(DL, VT, C0 + C1);
2494   }
2495 
2496   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2497   if ((N0.getOpcode() == ISD::ADD) &&
2498       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2499       (N1.getOpcode() == ISD::VSCALE)) {
2500     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2501     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2502     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2503     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2504   }
2505 
2506   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
2507   if (N0.getOpcode() == ISD::STEP_VECTOR &&
2508       N1.getOpcode() == ISD::STEP_VECTOR) {
2509     const APInt &C0 = N0->getConstantOperandAPInt(0);
2510     const APInt &C1 = N1->getConstantOperandAPInt(0);
2511     EVT SVT = N0.getOperand(0).getValueType();
2512     SDValue NewStep = DAG.getConstant(C0 + C1, DL, SVT);
2513     return DAG.getStepVector(DL, VT, NewStep);
2514   }
2515 
2516   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
2517   if ((N0.getOpcode() == ISD::ADD) &&
2518       (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) &&
2519       (N1.getOpcode() == ISD::STEP_VECTOR)) {
2520     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2521     const APInt &SV1 = N1->getConstantOperandAPInt(0);
2522     EVT SVT = N1.getOperand(0).getValueType();
2523     assert(N1.getOperand(0).getValueType() ==
2524                N0.getOperand(1)->getOperand(0).getValueType() &&
2525            "Different operand types of STEP_VECTOR.");
2526     SDValue NewStep = DAG.getConstant(SV0 + SV1, DL, SVT);
2527     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
2528     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
2529   }
2530 
2531   return SDValue();
2532 }
2533 
2534 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2535   unsigned Opcode = N->getOpcode();
2536   SDValue N0 = N->getOperand(0);
2537   SDValue N1 = N->getOperand(1);
2538   EVT VT = N0.getValueType();
2539   SDLoc DL(N);
2540 
2541   // fold vector ops
2542   if (VT.isVector()) {
2543     // TODO SimplifyVBinOp
2544 
2545     // fold (add_sat x, 0) -> x, vector edition
2546     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2547       return N0;
2548     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
2549       return N1;
2550   }
2551 
2552   // fold (add_sat x, undef) -> -1
2553   if (N0.isUndef() || N1.isUndef())
2554     return DAG.getAllOnesConstant(DL, VT);
2555 
2556   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2557     // canonicalize constant to RHS
2558     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2559       return DAG.getNode(Opcode, DL, VT, N1, N0);
2560     // fold (add_sat c1, c2) -> c3
2561     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2562   }
2563 
2564   // fold (add_sat x, 0) -> x
2565   if (isNullConstant(N1))
2566     return N0;
2567 
2568   // If it cannot overflow, transform into an add.
2569   if (Opcode == ISD::UADDSAT)
2570     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2571       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2572 
2573   return SDValue();
2574 }
2575 
2576 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2577   bool Masked = false;
2578 
2579   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2580   while (true) {
2581     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2582       V = V.getOperand(0);
2583       continue;
2584     }
2585 
2586     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2587       Masked = true;
2588       V = V.getOperand(0);
2589       continue;
2590     }
2591 
2592     break;
2593   }
2594 
2595   // If this is not a carry, return.
2596   if (V.getResNo() != 1)
2597     return SDValue();
2598 
2599   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2600       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2601     return SDValue();
2602 
2603   EVT VT = V.getNode()->getValueType(0);
2604   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2605     return SDValue();
2606 
2607   // If the result is masked, then no matter what kind of bool it is we can
2608   // return. If it isn't, then we need to make sure the bool type is either 0 or
2609   // 1 and not other values.
2610   if (Masked ||
2611       TLI.getBooleanContents(V.getValueType()) ==
2612           TargetLoweringBase::ZeroOrOneBooleanContent)
2613     return V;
2614 
2615   return SDValue();
2616 }
2617 
2618 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2619 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2620 /// the opcode and bypass the mask operation.
2621 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2622                                  SelectionDAG &DAG, const SDLoc &DL) {
2623   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2624     return SDValue();
2625 
2626   EVT VT = N0.getValueType();
2627   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2628     return SDValue();
2629 
2630   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2631   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2632   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2633 }
2634 
2635 /// Helper for doing combines based on N0 and N1 being added to each other.
2636 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2637                                           SDNode *LocReference) {
2638   EVT VT = N0.getValueType();
2639   SDLoc DL(LocReference);
2640 
2641   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2642   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2643       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2644     return DAG.getNode(ISD::SUB, DL, VT, N0,
2645                        DAG.getNode(ISD::SHL, DL, VT,
2646                                    N1.getOperand(0).getOperand(1),
2647                                    N1.getOperand(1)));
2648 
2649   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2650     return V;
2651 
2652   // Look for:
2653   //   add (add x, 1), y
2654   // And if the target does not like this form then turn into:
2655   //   sub y, (xor x, -1)
2656   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2657       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2658     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2659                               DAG.getAllOnesConstant(DL, VT));
2660     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2661   }
2662 
2663   // Hoist one-use subtraction by non-opaque constant:
2664   //   (x - C) + y  ->  (x + y) - C
2665   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2666   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2667       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2668     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2669     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2670   }
2671   // Hoist one-use subtraction from non-opaque constant:
2672   //   (C - x) + y  ->  (y - x) + C
2673   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2674       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2675     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2676     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2677   }
2678 
2679   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2680   // rather than 'add 0/-1' (the zext should get folded).
2681   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2682   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2683       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2684       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2685     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2686     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2687   }
2688 
2689   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2690   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2691     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2692     if (TN->getVT() == MVT::i1) {
2693       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2694                                  DAG.getConstant(1, DL, VT));
2695       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2696     }
2697   }
2698 
2699   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2700   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2701       N1.getResNo() == 0)
2702     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2703                        N0, N1.getOperand(0), N1.getOperand(2));
2704 
2705   // (add X, Carry) -> (addcarry X, 0, Carry)
2706   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2707     if (SDValue Carry = getAsCarry(TLI, N1))
2708       return DAG.getNode(ISD::ADDCARRY, DL,
2709                          DAG.getVTList(VT, Carry.getValueType()), N0,
2710                          DAG.getConstant(0, DL, VT), Carry);
2711 
2712   return SDValue();
2713 }
2714 
2715 SDValue DAGCombiner::visitADDC(SDNode *N) {
2716   SDValue N0 = N->getOperand(0);
2717   SDValue N1 = N->getOperand(1);
2718   EVT VT = N0.getValueType();
2719   SDLoc DL(N);
2720 
2721   // If the flag result is dead, turn this into an ADD.
2722   if (!N->hasAnyUseOfValue(1))
2723     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2724                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2725 
2726   // canonicalize constant to RHS.
2727   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2728   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2729   if (N0C && !N1C)
2730     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2731 
2732   // fold (addc x, 0) -> x + no carry out
2733   if (isNullConstant(N1))
2734     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2735                                         DL, MVT::Glue));
2736 
2737   // If it cannot overflow, transform into an add.
2738   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2739     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2740                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2741 
2742   return SDValue();
2743 }
2744 
2745 /**
2746  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2747  * then the flip also occurs if computing the inverse is the same cost.
2748  * This function returns an empty SDValue in case it cannot flip the boolean
2749  * without increasing the cost of the computation. If you want to flip a boolean
2750  * no matter what, use DAG.getLogicalNOT.
2751  */
2752 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2753                                   const TargetLowering &TLI,
2754                                   bool Force) {
2755   if (Force && isa<ConstantSDNode>(V))
2756     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2757 
2758   if (V.getOpcode() != ISD::XOR)
2759     return SDValue();
2760 
2761   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2762   if (!Const)
2763     return SDValue();
2764 
2765   EVT VT = V.getValueType();
2766 
2767   bool IsFlip = false;
2768   switch(TLI.getBooleanContents(VT)) {
2769     case TargetLowering::ZeroOrOneBooleanContent:
2770       IsFlip = Const->isOne();
2771       break;
2772     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2773       IsFlip = Const->isAllOnesValue();
2774       break;
2775     case TargetLowering::UndefinedBooleanContent:
2776       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2777       break;
2778   }
2779 
2780   if (IsFlip)
2781     return V.getOperand(0);
2782   if (Force)
2783     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2784   return SDValue();
2785 }
2786 
2787 SDValue DAGCombiner::visitADDO(SDNode *N) {
2788   SDValue N0 = N->getOperand(0);
2789   SDValue N1 = N->getOperand(1);
2790   EVT VT = N0.getValueType();
2791   bool IsSigned = (ISD::SADDO == N->getOpcode());
2792 
2793   EVT CarryVT = N->getValueType(1);
2794   SDLoc DL(N);
2795 
2796   // If the flag result is dead, turn this into an ADD.
2797   if (!N->hasAnyUseOfValue(1))
2798     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2799                      DAG.getUNDEF(CarryVT));
2800 
2801   // canonicalize constant to RHS.
2802   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2803       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2804     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2805 
2806   // fold (addo x, 0) -> x + no carry out
2807   if (isNullOrNullSplat(N1))
2808     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2809 
2810   if (!IsSigned) {
2811     // If it cannot overflow, transform into an add.
2812     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2813       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2814                        DAG.getConstant(0, DL, CarryVT));
2815 
2816     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2817     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2818       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2819                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2820       return CombineTo(
2821           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2822     }
2823 
2824     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2825       return Combined;
2826 
2827     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2828       return Combined;
2829   }
2830 
2831   return SDValue();
2832 }
2833 
2834 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2835   EVT VT = N0.getValueType();
2836   if (VT.isVector())
2837     return SDValue();
2838 
2839   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2840   // If Y + 1 cannot overflow.
2841   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2842     SDValue Y = N1.getOperand(0);
2843     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2844     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2845       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2846                          N1.getOperand(2));
2847   }
2848 
2849   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2850   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2851     if (SDValue Carry = getAsCarry(TLI, N1))
2852       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2853                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2854 
2855   return SDValue();
2856 }
2857 
2858 SDValue DAGCombiner::visitADDE(SDNode *N) {
2859   SDValue N0 = N->getOperand(0);
2860   SDValue N1 = N->getOperand(1);
2861   SDValue CarryIn = N->getOperand(2);
2862 
2863   // canonicalize constant to RHS
2864   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2865   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2866   if (N0C && !N1C)
2867     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2868                        N1, N0, CarryIn);
2869 
2870   // fold (adde x, y, false) -> (addc x, y)
2871   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2872     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2873 
2874   return SDValue();
2875 }
2876 
2877 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2878   SDValue N0 = N->getOperand(0);
2879   SDValue N1 = N->getOperand(1);
2880   SDValue CarryIn = N->getOperand(2);
2881   SDLoc DL(N);
2882 
2883   // canonicalize constant to RHS
2884   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2885   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2886   if (N0C && !N1C)
2887     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2888 
2889   // fold (addcarry x, y, false) -> (uaddo x, y)
2890   if (isNullConstant(CarryIn)) {
2891     if (!LegalOperations ||
2892         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2893       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2894   }
2895 
2896   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2897   if (isNullConstant(N0) && isNullConstant(N1)) {
2898     EVT VT = N0.getValueType();
2899     EVT CarryVT = CarryIn.getValueType();
2900     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2901     AddToWorklist(CarryExt.getNode());
2902     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2903                                     DAG.getConstant(1, DL, VT)),
2904                      DAG.getConstant(0, DL, CarryVT));
2905   }
2906 
2907   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2908     return Combined;
2909 
2910   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2911     return Combined;
2912 
2913   return SDValue();
2914 }
2915 
2916 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2917   SDValue N0 = N->getOperand(0);
2918   SDValue N1 = N->getOperand(1);
2919   SDValue CarryIn = N->getOperand(2);
2920   SDLoc DL(N);
2921 
2922   // canonicalize constant to RHS
2923   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2924   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2925   if (N0C && !N1C)
2926     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2927 
2928   // fold (saddo_carry x, y, false) -> (saddo x, y)
2929   if (isNullConstant(CarryIn)) {
2930     if (!LegalOperations ||
2931         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2932       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2933   }
2934 
2935   return SDValue();
2936 }
2937 
2938 /**
2939  * If we are facing some sort of diamond carry propapagtion pattern try to
2940  * break it up to generate something like:
2941  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2942  *
2943  * The end result is usually an increase in operation required, but because the
2944  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2945  *
2946  * Patterns typically look something like
2947  *            (uaddo A, B)
2948  *             /       \
2949  *          Carry      Sum
2950  *            |          \
2951  *            | (addcarry *, 0, Z)
2952  *            |       /
2953  *             \   Carry
2954  *              |   /
2955  * (addcarry X, *, *)
2956  *
2957  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2958  * produce a combine with a single path for carry propagation.
2959  */
2960 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2961                                       SDValue X, SDValue Carry0, SDValue Carry1,
2962                                       SDNode *N) {
2963   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2964     return SDValue();
2965   if (Carry1.getOpcode() != ISD::UADDO)
2966     return SDValue();
2967 
2968   SDValue Z;
2969 
2970   /**
2971    * First look for a suitable Z. It will present itself in the form of
2972    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2973    */
2974   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2975       isNullConstant(Carry0.getOperand(1))) {
2976     Z = Carry0.getOperand(2);
2977   } else if (Carry0.getOpcode() == ISD::UADDO &&
2978              isOneConstant(Carry0.getOperand(1))) {
2979     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2980     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2981   } else {
2982     // We couldn't find a suitable Z.
2983     return SDValue();
2984   }
2985 
2986 
2987   auto cancelDiamond = [&](SDValue A,SDValue B) {
2988     SDLoc DL(N);
2989     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2990     Combiner.AddToWorklist(NewY.getNode());
2991     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2992                        DAG.getConstant(0, DL, X.getValueType()),
2993                        NewY.getValue(1));
2994   };
2995 
2996   /**
2997    *      (uaddo A, B)
2998    *           |
2999    *          Sum
3000    *           |
3001    * (addcarry *, 0, Z)
3002    */
3003   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3004     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3005   }
3006 
3007   /**
3008    * (addcarry A, 0, Z)
3009    *         |
3010    *        Sum
3011    *         |
3012    *  (uaddo *, B)
3013    */
3014   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3015     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3016   }
3017 
3018   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3019     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3020   }
3021 
3022   return SDValue();
3023 }
3024 
3025 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3026 // match patterns like:
3027 //
3028 //          (uaddo A, B)            CarryIn
3029 //            |  \                     |
3030 //            |   \                    |
3031 //    PartialSum   PartialCarryOutX   /
3032 //            |        |             /
3033 //            |    ____|____________/
3034 //            |   /    |
3035 //     (uaddo *, *)    \________
3036 //       |  \                   \
3037 //       |   \                   |
3038 //       |    PartialCarryOutY   |
3039 //       |        \              |
3040 //       |         \            /
3041 //   AddCarrySum    |    ______/
3042 //                  |   /
3043 //   CarryOut = (or *, *)
3044 //
3045 // And generate ADDCARRY (or SUBCARRY) with two result values:
3046 //
3047 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3048 //
3049 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3050 // a single path for carry/borrow out propagation:
3051 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3052                                    const TargetLowering &TLI, SDValue Carry0,
3053                                    SDValue Carry1, SDNode *N) {
3054   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3055     return SDValue();
3056   unsigned Opcode = Carry0.getOpcode();
3057   if (Opcode != Carry1.getOpcode())
3058     return SDValue();
3059   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3060     return SDValue();
3061 
3062   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3063   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3064   // the above ASCII art.)
3065   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3066       Carry1.getOperand(1) != Carry0.getValue(0))
3067     std::swap(Carry0, Carry1);
3068   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3069       Carry1.getOperand(1) != Carry0.getValue(0))
3070     return SDValue();
3071 
3072   // The carry in value must be on the righthand side for subtraction.
3073   unsigned CarryInOperandNum =
3074       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3075   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3076     return SDValue();
3077   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3078 
3079   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3080   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3081     return SDValue();
3082 
3083   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3084   // TODO: make getAsCarry() aware of how partial carries are merged.
3085   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3086     return SDValue();
3087   CarryIn = CarryIn.getOperand(0);
3088   if (CarryIn.getValueType() != MVT::i1)
3089     return SDValue();
3090 
3091   SDLoc DL(N);
3092   SDValue Merged =
3093       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3094                   Carry0.getOperand(1), CarryIn);
3095 
3096   // Please note that because we have proven that the result of the UADDO/USUBO
3097   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3098   // therefore prove that if the first UADDO/USUBO overflows, the second
3099   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3100   // maximum value.
3101   //
3102   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3103   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3104   //
3105   // This is important because it means that OR and XOR can be used to merge
3106   // carry flags; and that AND can return a constant zero.
3107   //
3108   // TODO: match other operations that can merge flags (ADD, etc)
3109   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3110   if (N->getOpcode() == ISD::AND)
3111     return DAG.getConstant(0, DL, MVT::i1);
3112   return Merged.getValue(1);
3113 }
3114 
3115 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3116                                        SDNode *N) {
3117   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3118   if (isBitwiseNot(N0))
3119     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3120       SDLoc DL(N);
3121       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3122                                 N0.getOperand(0), NotC);
3123       return CombineTo(
3124           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3125     }
3126 
3127   // Iff the flag result is dead:
3128   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3129   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3130   // or the dependency between the instructions.
3131   if ((N0.getOpcode() == ISD::ADD ||
3132        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3133         N0.getValue(1) != CarryIn)) &&
3134       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3135     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3136                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3137 
3138   /**
3139    * When one of the addcarry argument is itself a carry, we may be facing
3140    * a diamond carry propagation. In which case we try to transform the DAG
3141    * to ensure linear carry propagation if that is possible.
3142    */
3143   if (auto Y = getAsCarry(TLI, N1)) {
3144     // Because both are carries, Y and Z can be swapped.
3145     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3146       return R;
3147     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3148       return R;
3149   }
3150 
3151   return SDValue();
3152 }
3153 
3154 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3155 // clamp/truncation if necessary.
3156 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3157                                    SDValue RHS, SelectionDAG &DAG,
3158                                    const SDLoc &DL) {
3159   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3160          "Illegal truncation");
3161 
3162   if (DstVT == SrcVT)
3163     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3164 
3165   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3166   // clamping RHS.
3167   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3168                                           DstVT.getScalarSizeInBits());
3169   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3170     return SDValue();
3171 
3172   SDValue SatLimit =
3173       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3174                                            DstVT.getScalarSizeInBits()),
3175                       DL, SrcVT);
3176   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3177   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3178   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3179   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3180 }
3181 
3182 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3183 // usubsat(a,b), optionally as a truncated type.
3184 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3185   if (N->getOpcode() != ISD::SUB ||
3186       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3187     return SDValue();
3188 
3189   EVT SubVT = N->getValueType(0);
3190   SDValue Op0 = N->getOperand(0);
3191   SDValue Op1 = N->getOperand(1);
3192 
3193   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3194   // they may be converted to usubsat(a,b).
3195   if (Op0.getOpcode() == ISD::UMAX) {
3196     SDValue MaxLHS = Op0.getOperand(0);
3197     SDValue MaxRHS = Op0.getOperand(1);
3198     if (MaxLHS == Op1)
3199       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3200     if (MaxRHS == Op1)
3201       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3202   }
3203 
3204   if (Op1.getOpcode() == ISD::UMIN) {
3205     SDValue MinLHS = Op1.getOperand(0);
3206     SDValue MinRHS = Op1.getOperand(1);
3207     if (MinLHS == Op0)
3208       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3209     if (MinRHS == Op0)
3210       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3211   }
3212 
3213   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3214   if (Op1.getOpcode() == ISD::TRUNCATE &&
3215       Op1.getOperand(0).getOpcode() == ISD::UMIN) {
3216     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3217     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3218     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3219       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3220                                  DAG, SDLoc(N));
3221     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3222       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3223                                  DAG, SDLoc(N));
3224   }
3225 
3226   return SDValue();
3227 }
3228 
3229 // Since it may not be valid to emit a fold to zero for vector initializers
3230 // check if we can before folding.
3231 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3232                              SelectionDAG &DAG, bool LegalOperations) {
3233   if (!VT.isVector())
3234     return DAG.getConstant(0, DL, VT);
3235   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3236     return DAG.getConstant(0, DL, VT);
3237   return SDValue();
3238 }
3239 
3240 SDValue DAGCombiner::visitSUB(SDNode *N) {
3241   SDValue N0 = N->getOperand(0);
3242   SDValue N1 = N->getOperand(1);
3243   EVT VT = N0.getValueType();
3244   SDLoc DL(N);
3245 
3246   // fold vector ops
3247   if (VT.isVector()) {
3248     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3249       return FoldedVOp;
3250 
3251     // fold (sub x, 0) -> x, vector edition
3252     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3253       return N0;
3254   }
3255 
3256   // fold (sub x, x) -> 0
3257   // FIXME: Refactor this and xor and other similar operations together.
3258   if (N0 == N1)
3259     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3260 
3261   // fold (sub c1, c2) -> c3
3262   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3263     return C;
3264 
3265   if (SDValue NewSel = foldBinOpIntoSelect(N))
3266     return NewSel;
3267 
3268   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3269 
3270   // fold (sub x, c) -> (add x, -c)
3271   if (N1C) {
3272     return DAG.getNode(ISD::ADD, DL, VT, N0,
3273                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3274   }
3275 
3276   if (isNullOrNullSplat(N0)) {
3277     unsigned BitWidth = VT.getScalarSizeInBits();
3278     // Right-shifting everything out but the sign bit followed by negation is
3279     // the same as flipping arithmetic/logical shift type without the negation:
3280     // -(X >>u 31) -> (X >>s 31)
3281     // -(X >>s 31) -> (X >>u 31)
3282     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3283       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3284       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3285         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3286         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3287           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3288       }
3289     }
3290 
3291     // 0 - X --> 0 if the sub is NUW.
3292     if (N->getFlags().hasNoUnsignedWrap())
3293       return N0;
3294 
3295     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3296       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3297       // N1 must be 0 because negating the minimum signed value is undefined.
3298       if (N->getFlags().hasNoSignedWrap())
3299         return N0;
3300 
3301       // 0 - X --> X if X is 0 or the minimum signed value.
3302       return N1;
3303     }
3304 
3305     // Convert 0 - abs(x).
3306     SDValue Result;
3307     if (N1->getOpcode() == ISD::ABS &&
3308         !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3309         TLI.expandABS(N1.getNode(), Result, DAG, true))
3310       return Result;
3311   }
3312 
3313   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3314   if (isAllOnesOrAllOnesSplat(N0))
3315     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3316 
3317   // fold (A - (0-B)) -> A+B
3318   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3319     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3320 
3321   // fold A-(A-B) -> B
3322   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3323     return N1.getOperand(1);
3324 
3325   // fold (A+B)-A -> B
3326   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3327     return N0.getOperand(1);
3328 
3329   // fold (A+B)-B -> A
3330   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3331     return N0.getOperand(0);
3332 
3333   // fold (A+C1)-C2 -> A+(C1-C2)
3334   if (N0.getOpcode() == ISD::ADD &&
3335       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3336       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3337     SDValue NewC =
3338         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3339     assert(NewC && "Constant folding failed");
3340     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3341   }
3342 
3343   // fold C2-(A+C1) -> (C2-C1)-A
3344   if (N1.getOpcode() == ISD::ADD) {
3345     SDValue N11 = N1.getOperand(1);
3346     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3347         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3348       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3349       assert(NewC && "Constant folding failed");
3350       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3351     }
3352   }
3353 
3354   // fold (A-C1)-C2 -> A-(C1+C2)
3355   if (N0.getOpcode() == ISD::SUB &&
3356       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3357       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3358     SDValue NewC =
3359         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3360     assert(NewC && "Constant folding failed");
3361     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3362   }
3363 
3364   // fold (c1-A)-c2 -> (c1-c2)-A
3365   if (N0.getOpcode() == ISD::SUB &&
3366       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3367       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3368     SDValue NewC =
3369         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3370     assert(NewC && "Constant folding failed");
3371     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3372   }
3373 
3374   // fold ((A+(B+or-C))-B) -> A+or-C
3375   if (N0.getOpcode() == ISD::ADD &&
3376       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3377        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3378       N0.getOperand(1).getOperand(0) == N1)
3379     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3380                        N0.getOperand(1).getOperand(1));
3381 
3382   // fold ((A+(C+B))-B) -> A+C
3383   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3384       N0.getOperand(1).getOperand(1) == N1)
3385     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3386                        N0.getOperand(1).getOperand(0));
3387 
3388   // fold ((A-(B-C))-C) -> A-B
3389   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3390       N0.getOperand(1).getOperand(1) == N1)
3391     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3392                        N0.getOperand(1).getOperand(0));
3393 
3394   // fold (A-(B-C)) -> A+(C-B)
3395   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3396     return DAG.getNode(ISD::ADD, DL, VT, N0,
3397                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3398                                    N1.getOperand(0)));
3399 
3400   // A - (A & B)  ->  A & (~B)
3401   if (N1.getOpcode() == ISD::AND) {
3402     SDValue A = N1.getOperand(0);
3403     SDValue B = N1.getOperand(1);
3404     if (A != N0)
3405       std::swap(A, B);
3406     if (A == N0 &&
3407         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3408       SDValue InvB =
3409           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3410       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3411     }
3412   }
3413 
3414   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3415   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3416     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3417         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3418       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3419                                 N1.getOperand(0).getOperand(1),
3420                                 N1.getOperand(1));
3421       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3422     }
3423     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3424         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3425       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3426                                 N1.getOperand(0),
3427                                 N1.getOperand(1).getOperand(1));
3428       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3429     }
3430   }
3431 
3432   // If either operand of a sub is undef, the result is undef
3433   if (N0.isUndef())
3434     return N0;
3435   if (N1.isUndef())
3436     return N1;
3437 
3438   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3439     return V;
3440 
3441   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3442     return V;
3443 
3444   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3445     return V;
3446 
3447   if (SDValue V = foldSubToUSubSat(VT, N))
3448     return V;
3449 
3450   // (x - y) - 1  ->  add (xor y, -1), x
3451   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3452     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3453                               DAG.getAllOnesConstant(DL, VT));
3454     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3455   }
3456 
3457   // Look for:
3458   //   sub y, (xor x, -1)
3459   // And if the target does not like this form then turn into:
3460   //   add (add x, y), 1
3461   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3462     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3463     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3464   }
3465 
3466   // Hoist one-use addition by non-opaque constant:
3467   //   (x + C) - y  ->  (x - y) + C
3468   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3469       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3470     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3471     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3472   }
3473   // y - (x + C)  ->  (y - x) - C
3474   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3475       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3476     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3477     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3478   }
3479   // (x - C) - y  ->  (x - y) - C
3480   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3481   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3482       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3483     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3484     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3485   }
3486   // (C - x) - y  ->  C - (x + y)
3487   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3488       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3489     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3490     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3491   }
3492 
3493   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3494   // rather than 'sub 0/1' (the sext should get folded).
3495   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3496   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3497       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3498       TLI.getBooleanContents(VT) ==
3499           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3500     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3501     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3502   }
3503 
3504   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3505   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3506     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3507       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3508       SDValue S0 = N1.getOperand(0);
3509       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3510         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3511           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3512             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3513     }
3514   }
3515 
3516   // If the relocation model supports it, consider symbol offsets.
3517   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3518     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3519       // fold (sub Sym, c) -> Sym-c
3520       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3521         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3522                                     GA->getOffset() -
3523                                         (uint64_t)N1C->getSExtValue());
3524       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3525       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3526         if (GA->getGlobal() == GB->getGlobal())
3527           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3528                                  DL, VT);
3529     }
3530 
3531   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3532   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3533     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3534     if (TN->getVT() == MVT::i1) {
3535       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3536                                  DAG.getConstant(1, DL, VT));
3537       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3538     }
3539   }
3540 
3541   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3542   if (N1.getOpcode() == ISD::VSCALE) {
3543     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3544     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3545   }
3546 
3547   // Prefer an add for more folding potential and possibly better codegen:
3548   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3549   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3550     SDValue ShAmt = N1.getOperand(1);
3551     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3552     if (ShAmtC &&
3553         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3554       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3555       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3556     }
3557   }
3558 
3559   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3560     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3561     if (SDValue Carry = getAsCarry(TLI, N0)) {
3562       SDValue X = N1;
3563       SDValue Zero = DAG.getConstant(0, DL, VT);
3564       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3565       return DAG.getNode(ISD::ADDCARRY, DL,
3566                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3567                          Carry);
3568     }
3569   }
3570 
3571   return SDValue();
3572 }
3573 
3574 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3575   SDValue N0 = N->getOperand(0);
3576   SDValue N1 = N->getOperand(1);
3577   EVT VT = N0.getValueType();
3578   SDLoc DL(N);
3579 
3580   // fold vector ops
3581   if (VT.isVector()) {
3582     // TODO SimplifyVBinOp
3583 
3584     // fold (sub_sat x, 0) -> x, vector edition
3585     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3586       return N0;
3587   }
3588 
3589   // fold (sub_sat x, undef) -> 0
3590   if (N0.isUndef() || N1.isUndef())
3591     return DAG.getConstant(0, DL, VT);
3592 
3593   // fold (sub_sat x, x) -> 0
3594   if (N0 == N1)
3595     return DAG.getConstant(0, DL, VT);
3596 
3597   // fold (sub_sat c1, c2) -> c3
3598   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3599     return C;
3600 
3601   // fold (sub_sat x, 0) -> x
3602   if (isNullConstant(N1))
3603     return N0;
3604 
3605   return SDValue();
3606 }
3607 
3608 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3609   SDValue N0 = N->getOperand(0);
3610   SDValue N1 = N->getOperand(1);
3611   EVT VT = N0.getValueType();
3612   SDLoc DL(N);
3613 
3614   // If the flag result is dead, turn this into an SUB.
3615   if (!N->hasAnyUseOfValue(1))
3616     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3617                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3618 
3619   // fold (subc x, x) -> 0 + no borrow
3620   if (N0 == N1)
3621     return CombineTo(N, DAG.getConstant(0, DL, VT),
3622                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3623 
3624   // fold (subc x, 0) -> x + no borrow
3625   if (isNullConstant(N1))
3626     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3627 
3628   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3629   if (isAllOnesConstant(N0))
3630     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3631                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3632 
3633   return SDValue();
3634 }
3635 
3636 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3637   SDValue N0 = N->getOperand(0);
3638   SDValue N1 = N->getOperand(1);
3639   EVT VT = N0.getValueType();
3640   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3641 
3642   EVT CarryVT = N->getValueType(1);
3643   SDLoc DL(N);
3644 
3645   // If the flag result is dead, turn this into an SUB.
3646   if (!N->hasAnyUseOfValue(1))
3647     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3648                      DAG.getUNDEF(CarryVT));
3649 
3650   // fold (subo x, x) -> 0 + no borrow
3651   if (N0 == N1)
3652     return CombineTo(N, DAG.getConstant(0, DL, VT),
3653                      DAG.getConstant(0, DL, CarryVT));
3654 
3655   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3656 
3657   // fold (subox, c) -> (addo x, -c)
3658   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3659     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3660                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3661   }
3662 
3663   // fold (subo x, 0) -> x + no borrow
3664   if (isNullOrNullSplat(N1))
3665     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3666 
3667   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3668   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3669     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3670                      DAG.getConstant(0, DL, CarryVT));
3671 
3672   return SDValue();
3673 }
3674 
3675 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3676   SDValue N0 = N->getOperand(0);
3677   SDValue N1 = N->getOperand(1);
3678   SDValue CarryIn = N->getOperand(2);
3679 
3680   // fold (sube x, y, false) -> (subc x, y)
3681   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3682     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3683 
3684   return SDValue();
3685 }
3686 
3687 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3688   SDValue N0 = N->getOperand(0);
3689   SDValue N1 = N->getOperand(1);
3690   SDValue CarryIn = N->getOperand(2);
3691 
3692   // fold (subcarry x, y, false) -> (usubo x, y)
3693   if (isNullConstant(CarryIn)) {
3694     if (!LegalOperations ||
3695         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3696       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3697   }
3698 
3699   return SDValue();
3700 }
3701 
3702 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3703   SDValue N0 = N->getOperand(0);
3704   SDValue N1 = N->getOperand(1);
3705   SDValue CarryIn = N->getOperand(2);
3706 
3707   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3708   if (isNullConstant(CarryIn)) {
3709     if (!LegalOperations ||
3710         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3711       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3712   }
3713 
3714   return SDValue();
3715 }
3716 
3717 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3718 // UMULFIXSAT here.
3719 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3720   SDValue N0 = N->getOperand(0);
3721   SDValue N1 = N->getOperand(1);
3722   SDValue Scale = N->getOperand(2);
3723   EVT VT = N0.getValueType();
3724 
3725   // fold (mulfix x, undef, scale) -> 0
3726   if (N0.isUndef() || N1.isUndef())
3727     return DAG.getConstant(0, SDLoc(N), VT);
3728 
3729   // Canonicalize constant to RHS (vector doesn't have to splat)
3730   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3731      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3732     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3733 
3734   // fold (mulfix x, 0, scale) -> 0
3735   if (isNullConstant(N1))
3736     return DAG.getConstant(0, SDLoc(N), VT);
3737 
3738   return SDValue();
3739 }
3740 
3741 SDValue DAGCombiner::visitMUL(SDNode *N) {
3742   SDValue N0 = N->getOperand(0);
3743   SDValue N1 = N->getOperand(1);
3744   EVT VT = N0.getValueType();
3745 
3746   // fold (mul x, undef) -> 0
3747   if (N0.isUndef() || N1.isUndef())
3748     return DAG.getConstant(0, SDLoc(N), VT);
3749 
3750   bool N1IsConst = false;
3751   bool N1IsOpaqueConst = false;
3752   APInt ConstValue1;
3753 
3754   // fold vector ops
3755   if (VT.isVector()) {
3756     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3757       return FoldedVOp;
3758 
3759     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3760     assert((!N1IsConst ||
3761             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3762            "Splat APInt should be element width");
3763   } else {
3764     N1IsConst = isa<ConstantSDNode>(N1);
3765     if (N1IsConst) {
3766       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3767       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3768     }
3769   }
3770 
3771   // fold (mul c1, c2) -> c1*c2
3772   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3773     return C;
3774 
3775   // canonicalize constant to RHS (vector doesn't have to splat)
3776   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3777      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3778     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3779 
3780   // fold (mul x, 0) -> 0
3781   if (N1IsConst && ConstValue1.isNullValue())
3782     return N1;
3783 
3784   // fold (mul x, 1) -> x
3785   if (N1IsConst && ConstValue1.isOneValue())
3786     return N0;
3787 
3788   if (SDValue NewSel = foldBinOpIntoSelect(N))
3789     return NewSel;
3790 
3791   // fold (mul x, -1) -> 0-x
3792   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3793     SDLoc DL(N);
3794     return DAG.getNode(ISD::SUB, DL, VT,
3795                        DAG.getConstant(0, DL, VT), N0);
3796   }
3797 
3798   // fold (mul x, (1 << c)) -> x << c
3799   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3800       DAG.isKnownToBeAPowerOfTwo(N1) &&
3801       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3802     SDLoc DL(N);
3803     SDValue LogBase2 = BuildLogBase2(N1, DL);
3804     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3805     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3806     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3807   }
3808 
3809   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3810   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3811     unsigned Log2Val = (-ConstValue1).logBase2();
3812     SDLoc DL(N);
3813     // FIXME: If the input is something that is easily negated (e.g. a
3814     // single-use add), we should put the negate there.
3815     return DAG.getNode(ISD::SUB, DL, VT,
3816                        DAG.getConstant(0, DL, VT),
3817                        DAG.getNode(ISD::SHL, DL, VT, N0,
3818                             DAG.getConstant(Log2Val, DL,
3819                                       getShiftAmountTy(N0.getValueType()))));
3820   }
3821 
3822   // Try to transform:
3823   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3824   // mul x, (2^N + 1) --> add (shl x, N), x
3825   // mul x, (2^N - 1) --> sub (shl x, N), x
3826   // Examples: x * 33 --> (x << 5) + x
3827   //           x * 15 --> (x << 4) - x
3828   //           x * -33 --> -((x << 5) + x)
3829   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3830   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3831   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3832   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3833   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3834   //           x * 0xf800 --> (x << 16) - (x << 11)
3835   //           x * -0x8800 --> -((x << 15) + (x << 11))
3836   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3837   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3838     // TODO: We could handle more general decomposition of any constant by
3839     //       having the target set a limit on number of ops and making a
3840     //       callback to determine that sequence (similar to sqrt expansion).
3841     unsigned MathOp = ISD::DELETED_NODE;
3842     APInt MulC = ConstValue1.abs();
3843     // The constant `2` should be treated as (2^0 + 1).
3844     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3845     MulC.lshrInPlace(TZeros);
3846     if ((MulC - 1).isPowerOf2())
3847       MathOp = ISD::ADD;
3848     else if ((MulC + 1).isPowerOf2())
3849       MathOp = ISD::SUB;
3850 
3851     if (MathOp != ISD::DELETED_NODE) {
3852       unsigned ShAmt =
3853           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3854       ShAmt += TZeros;
3855       assert(ShAmt < VT.getScalarSizeInBits() &&
3856              "multiply-by-constant generated out of bounds shift");
3857       SDLoc DL(N);
3858       SDValue Shl =
3859           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3860       SDValue R =
3861           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3862                                DAG.getNode(ISD::SHL, DL, VT, N0,
3863                                            DAG.getConstant(TZeros, DL, VT)))
3864                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3865       if (ConstValue1.isNegative())
3866         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3867       return R;
3868     }
3869   }
3870 
3871   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3872   if (N0.getOpcode() == ISD::SHL &&
3873       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3874       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3875     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3876     if (isConstantOrConstantVector(C3))
3877       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3878   }
3879 
3880   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3881   // use.
3882   {
3883     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3884 
3885     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3886     if (N0.getOpcode() == ISD::SHL &&
3887         isConstantOrConstantVector(N0.getOperand(1)) &&
3888         N0.getNode()->hasOneUse()) {
3889       Sh = N0; Y = N1;
3890     } else if (N1.getOpcode() == ISD::SHL &&
3891                isConstantOrConstantVector(N1.getOperand(1)) &&
3892                N1.getNode()->hasOneUse()) {
3893       Sh = N1; Y = N0;
3894     }
3895 
3896     if (Sh.getNode()) {
3897       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3898       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3899     }
3900   }
3901 
3902   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3903   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3904       N0.getOpcode() == ISD::ADD &&
3905       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3906       isMulAddWithConstProfitable(N, N0, N1))
3907       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3908                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3909                                      N0.getOperand(0), N1),
3910                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3911                                      N0.getOperand(1), N1));
3912 
3913   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3914   if (N0.getOpcode() == ISD::VSCALE)
3915     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3916       const APInt &C0 = N0.getConstantOperandAPInt(0);
3917       const APInt &C1 = NC1->getAPIntValue();
3918       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3919     }
3920 
3921   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
3922   APInt MulVal;
3923   if (N0.getOpcode() == ISD::STEP_VECTOR)
3924     if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
3925       const APInt &C0 = N0.getConstantOperandAPInt(0);
3926       EVT SVT = N0.getOperand(0).getValueType();
3927       SDValue NewStep = DAG.getConstant(
3928           C0 * MulVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT);
3929       return DAG.getStepVector(SDLoc(N), VT, NewStep);
3930     }
3931 
3932   // Fold ((mul x, 0/undef) -> 0,
3933   //       (mul x, 1) -> x) -> x)
3934   // -> and(x, mask)
3935   // We can replace vectors with '0' and '1' factors with a clearing mask.
3936   if (VT.isFixedLengthVector()) {
3937     unsigned NumElts = VT.getVectorNumElements();
3938     SmallBitVector ClearMask;
3939     ClearMask.reserve(NumElts);
3940     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3941       if (!V || V->isNullValue()) {
3942         ClearMask.push_back(true);
3943         return true;
3944       }
3945       ClearMask.push_back(false);
3946       return V->isOne();
3947     };
3948     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3949         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3950       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3951       SDLoc DL(N);
3952       EVT LegalSVT = N1.getOperand(0).getValueType();
3953       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3954       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3955       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3956       for (unsigned I = 0; I != NumElts; ++I)
3957         if (ClearMask[I])
3958           Mask[I] = Zero;
3959       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3960     }
3961   }
3962 
3963   // reassociate mul
3964   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3965     return RMUL;
3966 
3967   return SDValue();
3968 }
3969 
3970 /// Return true if divmod libcall is available.
3971 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3972                                      const TargetLowering &TLI) {
3973   RTLIB::Libcall LC;
3974   EVT NodeType = Node->getValueType(0);
3975   if (!NodeType.isSimple())
3976     return false;
3977   switch (NodeType.getSimpleVT().SimpleTy) {
3978   default: return false; // No libcall for vector types.
3979   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3980   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3981   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3982   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3983   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3984   }
3985 
3986   return TLI.getLibcallName(LC) != nullptr;
3987 }
3988 
3989 /// Issue divrem if both quotient and remainder are needed.
3990 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3991   if (Node->use_empty())
3992     return SDValue(); // This is a dead node, leave it alone.
3993 
3994   unsigned Opcode = Node->getOpcode();
3995   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3996   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3997 
3998   // DivMod lib calls can still work on non-legal types if using lib-calls.
3999   EVT VT = Node->getValueType(0);
4000   if (VT.isVector() || !VT.isInteger())
4001     return SDValue();
4002 
4003   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4004     return SDValue();
4005 
4006   // If DIVREM is going to get expanded into a libcall,
4007   // but there is no libcall available, then don't combine.
4008   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4009       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4010     return SDValue();
4011 
4012   // If div is legal, it's better to do the normal expansion
4013   unsigned OtherOpcode = 0;
4014   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4015     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4016     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4017       return SDValue();
4018   } else {
4019     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4020     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4021       return SDValue();
4022   }
4023 
4024   SDValue Op0 = Node->getOperand(0);
4025   SDValue Op1 = Node->getOperand(1);
4026   SDValue combined;
4027   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
4028          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
4029     SDNode *User = *UI;
4030     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4031         User->use_empty())
4032       continue;
4033     // Convert the other matching node(s), too;
4034     // otherwise, the DIVREM may get target-legalized into something
4035     // target-specific that we won't be able to recognize.
4036     unsigned UserOpc = User->getOpcode();
4037     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4038         User->getOperand(0) == Op0 &&
4039         User->getOperand(1) == Op1) {
4040       if (!combined) {
4041         if (UserOpc == OtherOpcode) {
4042           SDVTList VTs = DAG.getVTList(VT, VT);
4043           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4044         } else if (UserOpc == DivRemOpc) {
4045           combined = SDValue(User, 0);
4046         } else {
4047           assert(UserOpc == Opcode);
4048           continue;
4049         }
4050       }
4051       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4052         CombineTo(User, combined);
4053       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4054         CombineTo(User, combined.getValue(1));
4055     }
4056   }
4057   return combined;
4058 }
4059 
4060 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4061   SDValue N0 = N->getOperand(0);
4062   SDValue N1 = N->getOperand(1);
4063   EVT VT = N->getValueType(0);
4064   SDLoc DL(N);
4065 
4066   unsigned Opc = N->getOpcode();
4067   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4068   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4069 
4070   // X / undef -> undef
4071   // X % undef -> undef
4072   // X / 0 -> undef
4073   // X % 0 -> undef
4074   // NOTE: This includes vectors where any divisor element is zero/undef.
4075   if (DAG.isUndef(Opc, {N0, N1}))
4076     return DAG.getUNDEF(VT);
4077 
4078   // undef / X -> 0
4079   // undef % X -> 0
4080   if (N0.isUndef())
4081     return DAG.getConstant(0, DL, VT);
4082 
4083   // 0 / X -> 0
4084   // 0 % X -> 0
4085   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4086   if (N0C && N0C->isNullValue())
4087     return N0;
4088 
4089   // X / X -> 1
4090   // X % X -> 0
4091   if (N0 == N1)
4092     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4093 
4094   // X / 1 -> X
4095   // X % 1 -> 0
4096   // If this is a boolean op (single-bit element type), we can't have
4097   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4098   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4099   // it's a 1.
4100   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4101     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4102 
4103   return SDValue();
4104 }
4105 
4106 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4107   SDValue N0 = N->getOperand(0);
4108   SDValue N1 = N->getOperand(1);
4109   EVT VT = N->getValueType(0);
4110   EVT CCVT = getSetCCResultType(VT);
4111 
4112   // fold vector ops
4113   if (VT.isVector())
4114     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4115       return FoldedVOp;
4116 
4117   SDLoc DL(N);
4118 
4119   // fold (sdiv c1, c2) -> c1/c2
4120   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4121   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4122     return C;
4123 
4124   // fold (sdiv X, -1) -> 0-X
4125   if (N1C && N1C->isAllOnesValue())
4126     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4127 
4128   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4129   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4130     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4131                          DAG.getConstant(1, DL, VT),
4132                          DAG.getConstant(0, DL, VT));
4133 
4134   if (SDValue V = simplifyDivRem(N, DAG))
4135     return V;
4136 
4137   if (SDValue NewSel = foldBinOpIntoSelect(N))
4138     return NewSel;
4139 
4140   // If we know the sign bits of both operands are zero, strength reduce to a
4141   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4142   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4143     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4144 
4145   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4146     // If the corresponding remainder node exists, update its users with
4147     // (Dividend - (Quotient * Divisor).
4148     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4149                                               { N0, N1 })) {
4150       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4151       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4152       AddToWorklist(Mul.getNode());
4153       AddToWorklist(Sub.getNode());
4154       CombineTo(RemNode, Sub);
4155     }
4156     return V;
4157   }
4158 
4159   // sdiv, srem -> sdivrem
4160   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4161   // true.  Otherwise, we break the simplification logic in visitREM().
4162   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4163   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4164     if (SDValue DivRem = useDivRem(N))
4165         return DivRem;
4166 
4167   return SDValue();
4168 }
4169 
4170 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4171   SDLoc DL(N);
4172   EVT VT = N->getValueType(0);
4173   EVT CCVT = getSetCCResultType(VT);
4174   unsigned BitWidth = VT.getScalarSizeInBits();
4175 
4176   // Helper for determining whether a value is a power-2 constant scalar or a
4177   // vector of such elements.
4178   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4179     if (C->isNullValue() || C->isOpaque())
4180       return false;
4181     if (C->getAPIntValue().isPowerOf2())
4182       return true;
4183     if ((-C->getAPIntValue()).isPowerOf2())
4184       return true;
4185     return false;
4186   };
4187 
4188   // fold (sdiv X, pow2) -> simple ops after legalize
4189   // FIXME: We check for the exact bit here because the generic lowering gives
4190   // better results in that case. The target-specific lowering should learn how
4191   // to handle exact sdivs efficiently.
4192   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4193     // Target-specific implementation of sdiv x, pow2.
4194     if (SDValue Res = BuildSDIVPow2(N))
4195       return Res;
4196 
4197     // Create constants that are functions of the shift amount value.
4198     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4199     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4200     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4201     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4202     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4203     if (!isConstantOrConstantVector(Inexact))
4204       return SDValue();
4205 
4206     // Splat the sign bit into the register
4207     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4208                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4209     AddToWorklist(Sign.getNode());
4210 
4211     // Add (N0 < 0) ? abs2 - 1 : 0;
4212     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4213     AddToWorklist(Srl.getNode());
4214     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4215     AddToWorklist(Add.getNode());
4216     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4217     AddToWorklist(Sra.getNode());
4218 
4219     // Special case: (sdiv X, 1) -> X
4220     // Special Case: (sdiv X, -1) -> 0-X
4221     SDValue One = DAG.getConstant(1, DL, VT);
4222     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4223     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4224     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4225     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4226     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4227 
4228     // If dividing by a positive value, we're done. Otherwise, the result must
4229     // be negated.
4230     SDValue Zero = DAG.getConstant(0, DL, VT);
4231     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4232 
4233     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4234     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4235     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4236     return Res;
4237   }
4238 
4239   // If integer divide is expensive and we satisfy the requirements, emit an
4240   // alternate sequence.  Targets may check function attributes for size/speed
4241   // trade-offs.
4242   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4243   if (isConstantOrConstantVector(N1) &&
4244       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4245     if (SDValue Op = BuildSDIV(N))
4246       return Op;
4247 
4248   return SDValue();
4249 }
4250 
4251 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4252   SDValue N0 = N->getOperand(0);
4253   SDValue N1 = N->getOperand(1);
4254   EVT VT = N->getValueType(0);
4255   EVT CCVT = getSetCCResultType(VT);
4256 
4257   // fold vector ops
4258   if (VT.isVector())
4259     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4260       return FoldedVOp;
4261 
4262   SDLoc DL(N);
4263 
4264   // fold (udiv c1, c2) -> c1/c2
4265   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4266   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4267     return C;
4268 
4269   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4270   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4271     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4272                          DAG.getConstant(1, DL, VT),
4273                          DAG.getConstant(0, DL, VT));
4274 
4275   if (SDValue V = simplifyDivRem(N, DAG))
4276     return V;
4277 
4278   if (SDValue NewSel = foldBinOpIntoSelect(N))
4279     return NewSel;
4280 
4281   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4282     // If the corresponding remainder node exists, update its users with
4283     // (Dividend - (Quotient * Divisor).
4284     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4285                                               { N0, N1 })) {
4286       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4287       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4288       AddToWorklist(Mul.getNode());
4289       AddToWorklist(Sub.getNode());
4290       CombineTo(RemNode, Sub);
4291     }
4292     return V;
4293   }
4294 
4295   // sdiv, srem -> sdivrem
4296   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4297   // true.  Otherwise, we break the simplification logic in visitREM().
4298   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4299   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4300     if (SDValue DivRem = useDivRem(N))
4301         return DivRem;
4302 
4303   return SDValue();
4304 }
4305 
4306 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4307   SDLoc DL(N);
4308   EVT VT = N->getValueType(0);
4309 
4310   // fold (udiv x, (1 << c)) -> x >>u c
4311   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4312       DAG.isKnownToBeAPowerOfTwo(N1)) {
4313     SDValue LogBase2 = BuildLogBase2(N1, DL);
4314     AddToWorklist(LogBase2.getNode());
4315 
4316     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4317     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4318     AddToWorklist(Trunc.getNode());
4319     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4320   }
4321 
4322   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4323   if (N1.getOpcode() == ISD::SHL) {
4324     SDValue N10 = N1.getOperand(0);
4325     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4326         DAG.isKnownToBeAPowerOfTwo(N10)) {
4327       SDValue LogBase2 = BuildLogBase2(N10, DL);
4328       AddToWorklist(LogBase2.getNode());
4329 
4330       EVT ADDVT = N1.getOperand(1).getValueType();
4331       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4332       AddToWorklist(Trunc.getNode());
4333       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4334       AddToWorklist(Add.getNode());
4335       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4336     }
4337   }
4338 
4339   // fold (udiv x, c) -> alternate
4340   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4341   if (isConstantOrConstantVector(N1) &&
4342       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4343     if (SDValue Op = BuildUDIV(N))
4344       return Op;
4345 
4346   return SDValue();
4347 }
4348 
4349 // handles ISD::SREM and ISD::UREM
4350 SDValue DAGCombiner::visitREM(SDNode *N) {
4351   unsigned Opcode = N->getOpcode();
4352   SDValue N0 = N->getOperand(0);
4353   SDValue N1 = N->getOperand(1);
4354   EVT VT = N->getValueType(0);
4355   EVT CCVT = getSetCCResultType(VT);
4356 
4357   bool isSigned = (Opcode == ISD::SREM);
4358   SDLoc DL(N);
4359 
4360   // fold (rem c1, c2) -> c1%c2
4361   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4362   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4363     return C;
4364 
4365   // fold (urem X, -1) -> select(X == -1, 0, x)
4366   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4367     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4368                          DAG.getConstant(0, DL, VT), N0);
4369 
4370   if (SDValue V = simplifyDivRem(N, DAG))
4371     return V;
4372 
4373   if (SDValue NewSel = foldBinOpIntoSelect(N))
4374     return NewSel;
4375 
4376   if (isSigned) {
4377     // If we know the sign bits of both operands are zero, strength reduce to a
4378     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4379     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4380       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4381   } else {
4382     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4383       // fold (urem x, pow2) -> (and x, pow2-1)
4384       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4385       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4386       AddToWorklist(Add.getNode());
4387       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4388     }
4389     if (N1.getOpcode() == ISD::SHL &&
4390         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4391       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4392       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4393       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4394       AddToWorklist(Add.getNode());
4395       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4396     }
4397   }
4398 
4399   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4400 
4401   // If X/C can be simplified by the division-by-constant logic, lower
4402   // X%C to the equivalent of X-X/C*C.
4403   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4404   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4405   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4406   // combine will not return a DIVREM.  Regardless, checking cheapness here
4407   // makes sense since the simplification results in fatter code.
4408   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4409     SDValue OptimizedDiv =
4410         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4411     if (OptimizedDiv.getNode()) {
4412       // If the equivalent Div node also exists, update its users.
4413       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4414       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4415                                                 { N0, N1 }))
4416         CombineTo(DivNode, OptimizedDiv);
4417       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4418       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4419       AddToWorklist(OptimizedDiv.getNode());
4420       AddToWorklist(Mul.getNode());
4421       return Sub;
4422     }
4423   }
4424 
4425   // sdiv, srem -> sdivrem
4426   if (SDValue DivRem = useDivRem(N))
4427     return DivRem.getValue(1);
4428 
4429   return SDValue();
4430 }
4431 
4432 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4433   SDValue N0 = N->getOperand(0);
4434   SDValue N1 = N->getOperand(1);
4435   EVT VT = N->getValueType(0);
4436   SDLoc DL(N);
4437 
4438   if (VT.isVector()) {
4439     // fold (mulhs x, 0) -> 0
4440     // do not return N0/N1, because undef node may exist.
4441     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4442         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4443       return DAG.getConstant(0, DL, VT);
4444   }
4445 
4446   // fold (mulhs x, 0) -> 0
4447   if (isNullConstant(N1))
4448     return N1;
4449   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4450   if (isOneConstant(N1))
4451     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4452                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4453                                        getShiftAmountTy(N0.getValueType())));
4454 
4455   // fold (mulhs x, undef) -> 0
4456   if (N0.isUndef() || N1.isUndef())
4457     return DAG.getConstant(0, DL, VT);
4458 
4459   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4460   // plus a shift.
4461   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4462       !VT.isVector()) {
4463     MVT Simple = VT.getSimpleVT();
4464     unsigned SimpleSize = Simple.getSizeInBits();
4465     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4466     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4467       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4468       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4469       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4470       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4471             DAG.getConstant(SimpleSize, DL,
4472                             getShiftAmountTy(N1.getValueType())));
4473       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4474     }
4475   }
4476 
4477   return SDValue();
4478 }
4479 
4480 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4481   SDValue N0 = N->getOperand(0);
4482   SDValue N1 = N->getOperand(1);
4483   EVT VT = N->getValueType(0);
4484   SDLoc DL(N);
4485 
4486   if (VT.isVector()) {
4487     // fold (mulhu x, 0) -> 0
4488     // do not return N0/N1, because undef node may exist.
4489     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
4490         ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4491       return DAG.getConstant(0, DL, VT);
4492   }
4493 
4494   // fold (mulhu x, 0) -> 0
4495   if (isNullConstant(N1))
4496     return N1;
4497   // fold (mulhu x, 1) -> 0
4498   if (isOneConstant(N1))
4499     return DAG.getConstant(0, DL, N0.getValueType());
4500   // fold (mulhu x, undef) -> 0
4501   if (N0.isUndef() || N1.isUndef())
4502     return DAG.getConstant(0, DL, VT);
4503 
4504   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4505   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4506       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4507     unsigned NumEltBits = VT.getScalarSizeInBits();
4508     SDValue LogBase2 = BuildLogBase2(N1, DL);
4509     SDValue SRLAmt = DAG.getNode(
4510         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4511     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4512     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4513     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4514   }
4515 
4516   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4517   // plus a shift.
4518   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4519       !VT.isVector()) {
4520     MVT Simple = VT.getSimpleVT();
4521     unsigned SimpleSize = Simple.getSizeInBits();
4522     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4523     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4524       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4525       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4526       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4527       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4528             DAG.getConstant(SimpleSize, DL,
4529                             getShiftAmountTy(N1.getValueType())));
4530       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4531     }
4532   }
4533 
4534   return SDValue();
4535 }
4536 
4537 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4538 /// give the opcodes for the two computations that are being performed. Return
4539 /// true if a simplification was made.
4540 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4541                                                 unsigned HiOp) {
4542   // If the high half is not needed, just compute the low half.
4543   bool HiExists = N->hasAnyUseOfValue(1);
4544   if (!HiExists && (!LegalOperations ||
4545                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4546     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4547     return CombineTo(N, Res, Res);
4548   }
4549 
4550   // If the low half is not needed, just compute the high half.
4551   bool LoExists = N->hasAnyUseOfValue(0);
4552   if (!LoExists && (!LegalOperations ||
4553                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4554     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4555     return CombineTo(N, Res, Res);
4556   }
4557 
4558   // If both halves are used, return as it is.
4559   if (LoExists && HiExists)
4560     return SDValue();
4561 
4562   // If the two computed results can be simplified separately, separate them.
4563   if (LoExists) {
4564     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4565     AddToWorklist(Lo.getNode());
4566     SDValue LoOpt = combine(Lo.getNode());
4567     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4568         (!LegalOperations ||
4569          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4570       return CombineTo(N, LoOpt, LoOpt);
4571   }
4572 
4573   if (HiExists) {
4574     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4575     AddToWorklist(Hi.getNode());
4576     SDValue HiOpt = combine(Hi.getNode());
4577     if (HiOpt.getNode() && HiOpt != Hi &&
4578         (!LegalOperations ||
4579          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4580       return CombineTo(N, HiOpt, HiOpt);
4581   }
4582 
4583   return SDValue();
4584 }
4585 
4586 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4587   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4588     return Res;
4589 
4590   EVT VT = N->getValueType(0);
4591   SDLoc DL(N);
4592 
4593   // If the type is twice as wide is legal, transform the mulhu to a wider
4594   // multiply plus a shift.
4595   if (VT.isSimple() && !VT.isVector()) {
4596     MVT Simple = VT.getSimpleVT();
4597     unsigned SimpleSize = Simple.getSizeInBits();
4598     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4599     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4600       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4601       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4602       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4603       // Compute the high part as N1.
4604       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4605             DAG.getConstant(SimpleSize, DL,
4606                             getShiftAmountTy(Lo.getValueType())));
4607       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4608       // Compute the low part as N0.
4609       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4610       return CombineTo(N, Lo, Hi);
4611     }
4612   }
4613 
4614   return SDValue();
4615 }
4616 
4617 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4618   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4619     return Res;
4620 
4621   EVT VT = N->getValueType(0);
4622   SDLoc DL(N);
4623 
4624   // (umul_lohi N0, 0) -> (0, 0)
4625   if (isNullConstant(N->getOperand(1))) {
4626     SDValue Zero = DAG.getConstant(0, DL, VT);
4627     return CombineTo(N, Zero, Zero);
4628   }
4629 
4630   // (umul_lohi N0, 1) -> (N0, 0)
4631   if (isOneConstant(N->getOperand(1))) {
4632     SDValue Zero = DAG.getConstant(0, DL, VT);
4633     return CombineTo(N, N->getOperand(0), Zero);
4634   }
4635 
4636   // If the type is twice as wide is legal, transform the mulhu to a wider
4637   // multiply plus a shift.
4638   if (VT.isSimple() && !VT.isVector()) {
4639     MVT Simple = VT.getSimpleVT();
4640     unsigned SimpleSize = Simple.getSizeInBits();
4641     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4642     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4643       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4644       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4645       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4646       // Compute the high part as N1.
4647       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4648             DAG.getConstant(SimpleSize, DL,
4649                             getShiftAmountTy(Lo.getValueType())));
4650       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4651       // Compute the low part as N0.
4652       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4653       return CombineTo(N, Lo, Hi);
4654     }
4655   }
4656 
4657   return SDValue();
4658 }
4659 
4660 SDValue DAGCombiner::visitMULO(SDNode *N) {
4661   SDValue N0 = N->getOperand(0);
4662   SDValue N1 = N->getOperand(1);
4663   EVT VT = N0.getValueType();
4664   bool IsSigned = (ISD::SMULO == N->getOpcode());
4665 
4666   EVT CarryVT = N->getValueType(1);
4667   SDLoc DL(N);
4668 
4669   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4670   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4671 
4672   // fold operation with constant operands.
4673   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
4674   // multiple results.
4675   if (N0C && N1C) {
4676     bool Overflow;
4677     APInt Result =
4678         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
4679                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
4680     return CombineTo(N, DAG.getConstant(Result, DL, VT),
4681                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
4682   }
4683 
4684   // canonicalize constant to RHS.
4685   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4686       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4687     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4688 
4689   // fold (mulo x, 0) -> 0 + no carry out
4690   if (isNullOrNullSplat(N1))
4691     return CombineTo(N, DAG.getConstant(0, DL, VT),
4692                      DAG.getConstant(0, DL, CarryVT));
4693 
4694   // (mulo x, 2) -> (addo x, x)
4695   if (N1C && N1C->getAPIntValue() == 2)
4696     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4697                        N->getVTList(), N0, N0);
4698 
4699   if (IsSigned) {
4700     // A 1 bit SMULO overflows if both inputs are 1.
4701     if (VT.getScalarSizeInBits() == 1) {
4702       SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
4703       return CombineTo(N, And,
4704                        DAG.getSetCC(DL, CarryVT, And,
4705                                     DAG.getConstant(0, DL, VT), ISD::SETNE));
4706     }
4707 
4708     // Multiplying n * m significant bits yields a result of n + m significant
4709     // bits. If the total number of significant bits does not exceed the
4710     // result bit width (minus 1), there is no overflow.
4711     unsigned SignBits = DAG.ComputeNumSignBits(N0);
4712     if (SignBits > 1)
4713       SignBits += DAG.ComputeNumSignBits(N1);
4714     if (SignBits > VT.getScalarSizeInBits() + 1)
4715       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4716                        DAG.getConstant(0, DL, CarryVT));
4717   } else {
4718     KnownBits N1Known = DAG.computeKnownBits(N1);
4719     KnownBits N0Known = DAG.computeKnownBits(N0);
4720     bool Overflow;
4721     (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow);
4722     if (!Overflow)
4723       return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
4724                        DAG.getConstant(0, DL, CarryVT));
4725   }
4726 
4727   return SDValue();
4728 }
4729 
4730 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4731   SDValue N0 = N->getOperand(0);
4732   SDValue N1 = N->getOperand(1);
4733   EVT VT = N0.getValueType();
4734   unsigned Opcode = N->getOpcode();
4735 
4736   // fold vector ops
4737   if (VT.isVector())
4738     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4739       return FoldedVOp;
4740 
4741   // fold operation with constant operands.
4742   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4743     return C;
4744 
4745   // canonicalize constant to RHS
4746   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4747       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4748     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4749 
4750   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4751   // Only do this if the current op isn't legal and the flipped is.
4752   if (!TLI.isOperationLegal(Opcode, VT) &&
4753       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4754       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4755     unsigned AltOpcode;
4756     switch (Opcode) {
4757     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4758     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4759     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4760     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4761     default: llvm_unreachable("Unknown MINMAX opcode");
4762     }
4763     if (TLI.isOperationLegal(AltOpcode, VT))
4764       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4765   }
4766 
4767   // Simplify the operands using demanded-bits information.
4768   if (SimplifyDemandedBits(SDValue(N, 0)))
4769     return SDValue(N, 0);
4770 
4771   return SDValue();
4772 }
4773 
4774 /// If this is a bitwise logic instruction and both operands have the same
4775 /// opcode, try to sink the other opcode after the logic instruction.
4776 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4777   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4778   EVT VT = N0.getValueType();
4779   unsigned LogicOpcode = N->getOpcode();
4780   unsigned HandOpcode = N0.getOpcode();
4781   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4782           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4783   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4784 
4785   // Bail early if none of these transforms apply.
4786   if (N0.getNumOperands() == 0)
4787     return SDValue();
4788 
4789   // FIXME: We should check number of uses of the operands to not increase
4790   //        the instruction count for all transforms.
4791 
4792   // Handle size-changing casts.
4793   SDValue X = N0.getOperand(0);
4794   SDValue Y = N1.getOperand(0);
4795   EVT XVT = X.getValueType();
4796   SDLoc DL(N);
4797   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4798       HandOpcode == ISD::SIGN_EXTEND) {
4799     // If both operands have other uses, this transform would create extra
4800     // instructions without eliminating anything.
4801     if (!N0.hasOneUse() && !N1.hasOneUse())
4802       return SDValue();
4803     // We need matching integer source types.
4804     if (XVT != Y.getValueType())
4805       return SDValue();
4806     // Don't create an illegal op during or after legalization. Don't ever
4807     // create an unsupported vector op.
4808     if ((VT.isVector() || LegalOperations) &&
4809         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4810       return SDValue();
4811     // Avoid infinite looping with PromoteIntBinOp.
4812     // TODO: Should we apply desirable/legal constraints to all opcodes?
4813     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4814         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4815       return SDValue();
4816     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4817     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4818     return DAG.getNode(HandOpcode, DL, VT, Logic);
4819   }
4820 
4821   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4822   if (HandOpcode == ISD::TRUNCATE) {
4823     // If both operands have other uses, this transform would create extra
4824     // instructions without eliminating anything.
4825     if (!N0.hasOneUse() && !N1.hasOneUse())
4826       return SDValue();
4827     // We need matching source types.
4828     if (XVT != Y.getValueType())
4829       return SDValue();
4830     // Don't create an illegal op during or after legalization.
4831     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4832       return SDValue();
4833     // Be extra careful sinking truncate. If it's free, there's no benefit in
4834     // widening a binop. Also, don't create a logic op on an illegal type.
4835     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4836       return SDValue();
4837     if (!TLI.isTypeLegal(XVT))
4838       return SDValue();
4839     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4840     return DAG.getNode(HandOpcode, DL, VT, Logic);
4841   }
4842 
4843   // For binops SHL/SRL/SRA/AND:
4844   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4845   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4846        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4847       N0.getOperand(1) == N1.getOperand(1)) {
4848     // If either operand has other uses, this transform is not an improvement.
4849     if (!N0.hasOneUse() || !N1.hasOneUse())
4850       return SDValue();
4851     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4852     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4853   }
4854 
4855   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4856   if (HandOpcode == ISD::BSWAP) {
4857     // If either operand has other uses, this transform is not an improvement.
4858     if (!N0.hasOneUse() || !N1.hasOneUse())
4859       return SDValue();
4860     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4861     return DAG.getNode(HandOpcode, DL, VT, Logic);
4862   }
4863 
4864   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4865   // Only perform this optimization up until type legalization, before
4866   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4867   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4868   // we don't want to undo this promotion.
4869   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4870   // on scalars.
4871   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4872        Level <= AfterLegalizeTypes) {
4873     // Input types must be integer and the same.
4874     if (XVT.isInteger() && XVT == Y.getValueType() &&
4875         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4876           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4877       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4878       return DAG.getNode(HandOpcode, DL, VT, Logic);
4879     }
4880   }
4881 
4882   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4883   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4884   // If both shuffles use the same mask, and both shuffle within a single
4885   // vector, then it is worthwhile to move the swizzle after the operation.
4886   // The type-legalizer generates this pattern when loading illegal
4887   // vector types from memory. In many cases this allows additional shuffle
4888   // optimizations.
4889   // There are other cases where moving the shuffle after the xor/and/or
4890   // is profitable even if shuffles don't perform a swizzle.
4891   // If both shuffles use the same mask, and both shuffles have the same first
4892   // or second operand, then it might still be profitable to move the shuffle
4893   // after the xor/and/or operation.
4894   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4895     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4896     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4897     assert(X.getValueType() == Y.getValueType() &&
4898            "Inputs to shuffles are not the same type");
4899 
4900     // Check that both shuffles use the same mask. The masks are known to be of
4901     // the same length because the result vector type is the same.
4902     // Check also that shuffles have only one use to avoid introducing extra
4903     // instructions.
4904     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4905         !SVN0->getMask().equals(SVN1->getMask()))
4906       return SDValue();
4907 
4908     // Don't try to fold this node if it requires introducing a
4909     // build vector of all zeros that might be illegal at this stage.
4910     SDValue ShOp = N0.getOperand(1);
4911     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4912       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4913 
4914     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4915     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4916       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4917                                   N0.getOperand(0), N1.getOperand(0));
4918       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4919     }
4920 
4921     // Don't try to fold this node if it requires introducing a
4922     // build vector of all zeros that might be illegal at this stage.
4923     ShOp = N0.getOperand(0);
4924     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4925       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4926 
4927     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4928     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4929       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4930                                   N1.getOperand(1));
4931       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4932     }
4933   }
4934 
4935   return SDValue();
4936 }
4937 
4938 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4939 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4940                                        const SDLoc &DL) {
4941   SDValue LL, LR, RL, RR, N0CC, N1CC;
4942   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4943       !isSetCCEquivalent(N1, RL, RR, N1CC))
4944     return SDValue();
4945 
4946   assert(N0.getValueType() == N1.getValueType() &&
4947          "Unexpected operand types for bitwise logic op");
4948   assert(LL.getValueType() == LR.getValueType() &&
4949          RL.getValueType() == RR.getValueType() &&
4950          "Unexpected operand types for setcc");
4951 
4952   // If we're here post-legalization or the logic op type is not i1, the logic
4953   // op type must match a setcc result type. Also, all folds require new
4954   // operations on the left and right operands, so those types must match.
4955   EVT VT = N0.getValueType();
4956   EVT OpVT = LL.getValueType();
4957   if (LegalOperations || VT.getScalarType() != MVT::i1)
4958     if (VT != getSetCCResultType(OpVT))
4959       return SDValue();
4960   if (OpVT != RL.getValueType())
4961     return SDValue();
4962 
4963   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4964   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4965   bool IsInteger = OpVT.isInteger();
4966   if (LR == RR && CC0 == CC1 && IsInteger) {
4967     bool IsZero = isNullOrNullSplat(LR);
4968     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4969 
4970     // All bits clear?
4971     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4972     // All sign bits clear?
4973     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4974     // Any bits set?
4975     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4976     // Any sign bits set?
4977     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4978 
4979     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4980     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4981     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4982     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4983     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4984       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4985       AddToWorklist(Or.getNode());
4986       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4987     }
4988 
4989     // All bits set?
4990     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4991     // All sign bits set?
4992     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4993     // Any bits clear?
4994     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4995     // Any sign bits clear?
4996     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4997 
4998     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4999     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5000     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5001     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5002     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5003       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5004       AddToWorklist(And.getNode());
5005       return DAG.getSetCC(DL, VT, And, LR, CC1);
5006     }
5007   }
5008 
5009   // TODO: What is the 'or' equivalent of this fold?
5010   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
5011   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
5012       IsInteger && CC0 == ISD::SETNE &&
5013       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
5014        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
5015     SDValue One = DAG.getConstant(1, DL, OpVT);
5016     SDValue Two = DAG.getConstant(2, DL, OpVT);
5017     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
5018     AddToWorklist(Add.getNode());
5019     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
5020   }
5021 
5022   // Try more general transforms if the predicates match and the only user of
5023   // the compares is the 'and' or 'or'.
5024   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
5025       N0.hasOneUse() && N1.hasOneUse()) {
5026     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
5027     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
5028     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
5029       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
5030       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
5031       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
5032       SDValue Zero = DAG.getConstant(0, DL, OpVT);
5033       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
5034     }
5035 
5036     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
5037     // TODO - support non-uniform vector amounts.
5038     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
5039       // Match a shared variable operand and 2 non-opaque constant operands.
5040       ConstantSDNode *C0 = isConstOrConstSplat(LR);
5041       ConstantSDNode *C1 = isConstOrConstSplat(RR);
5042       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
5043         const APInt &CMax =
5044             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
5045         const APInt &CMin =
5046             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
5047         // The difference of the constants must be a single bit.
5048         if ((CMax - CMin).isPowerOf2()) {
5049           // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
5050           // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
5051           SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
5052           SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
5053           SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
5054           SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
5055           SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
5056           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
5057           SDValue Zero = DAG.getConstant(0, DL, OpVT);
5058           return DAG.getSetCC(DL, VT, And, Zero, CC0);
5059         }
5060       }
5061     }
5062   }
5063 
5064   // Canonicalize equivalent operands to LL == RL.
5065   if (LL == RR && LR == RL) {
5066     CC1 = ISD::getSetCCSwappedOperands(CC1);
5067     std::swap(RL, RR);
5068   }
5069 
5070   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5071   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
5072   if (LL == RL && LR == RR) {
5073     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
5074                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
5075     if (NewCC != ISD::SETCC_INVALID &&
5076         (!LegalOperations ||
5077          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
5078           TLI.isOperationLegal(ISD::SETCC, OpVT))))
5079       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
5080   }
5081 
5082   return SDValue();
5083 }
5084 
5085 /// This contains all DAGCombine rules which reduce two values combined by
5086 /// an And operation to a single value. This makes them reusable in the context
5087 /// of visitSELECT(). Rules involving constants are not included as
5088 /// visitSELECT() already handles those cases.
5089 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
5090   EVT VT = N1.getValueType();
5091   SDLoc DL(N);
5092 
5093   // fold (and x, undef) -> 0
5094   if (N0.isUndef() || N1.isUndef())
5095     return DAG.getConstant(0, DL, VT);
5096 
5097   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
5098     return V;
5099 
5100   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
5101       VT.getSizeInBits() <= 64) {
5102     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5103       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
5104         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
5105         // immediate for an add, but it is legal if its top c2 bits are set,
5106         // transform the ADD so the immediate doesn't need to be materialized
5107         // in a register.
5108         APInt ADDC = ADDI->getAPIntValue();
5109         APInt SRLC = SRLI->getAPIntValue();
5110         if (ADDC.getMinSignedBits() <= 64 &&
5111             SRLC.ult(VT.getSizeInBits()) &&
5112             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5113           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
5114                                              SRLC.getZExtValue());
5115           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
5116             ADDC |= Mask;
5117             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
5118               SDLoc DL0(N0);
5119               SDValue NewAdd =
5120                 DAG.getNode(ISD::ADD, DL0, VT,
5121                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
5122               CombineTo(N0.getNode(), NewAdd);
5123               // Return N so it doesn't get rechecked!
5124               return SDValue(N, 0);
5125             }
5126           }
5127         }
5128       }
5129     }
5130   }
5131 
5132   // Reduce bit extract of low half of an integer to the narrower type.
5133   // (and (srl i64:x, K), KMask) ->
5134   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
5135   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
5136     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
5137       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5138         unsigned Size = VT.getSizeInBits();
5139         const APInt &AndMask = CAnd->getAPIntValue();
5140         unsigned ShiftBits = CShift->getZExtValue();
5141 
5142         // Bail out, this node will probably disappear anyway.
5143         if (ShiftBits == 0)
5144           return SDValue();
5145 
5146         unsigned MaskBits = AndMask.countTrailingOnes();
5147         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
5148 
5149         if (AndMask.isMask() &&
5150             // Required bits must not span the two halves of the integer and
5151             // must fit in the half size type.
5152             (ShiftBits + MaskBits <= Size / 2) &&
5153             TLI.isNarrowingProfitable(VT, HalfVT) &&
5154             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5155             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5156             TLI.isTruncateFree(VT, HalfVT) &&
5157             TLI.isZExtFree(HalfVT, VT)) {
5158           // The isNarrowingProfitable is to avoid regressions on PPC and
5159           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5160           // on downstream users of this. Those patterns could probably be
5161           // extended to handle extensions mixed in.
5162 
5163           SDValue SL(N0);
5164           assert(MaskBits <= Size);
5165 
5166           // Extracting the highest bit of the low half.
5167           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5168           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5169                                       N0.getOperand(0));
5170 
5171           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5172           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5173           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5174           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5175           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5176         }
5177       }
5178     }
5179   }
5180 
5181   return SDValue();
5182 }
5183 
5184 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5185                                    EVT LoadResultTy, EVT &ExtVT) {
5186   if (!AndC->getAPIntValue().isMask())
5187     return false;
5188 
5189   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5190 
5191   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5192   EVT LoadedVT = LoadN->getMemoryVT();
5193 
5194   if (ExtVT == LoadedVT &&
5195       (!LegalOperations ||
5196        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5197     // ZEXTLOAD will match without needing to change the size of the value being
5198     // loaded.
5199     return true;
5200   }
5201 
5202   // Do not change the width of a volatile or atomic loads.
5203   if (!LoadN->isSimple())
5204     return false;
5205 
5206   // Do not generate loads of non-round integer types since these can
5207   // be expensive (and would be wrong if the type is not byte sized).
5208   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5209     return false;
5210 
5211   if (LegalOperations &&
5212       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5213     return false;
5214 
5215   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5216     return false;
5217 
5218   return true;
5219 }
5220 
5221 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5222                                     ISD::LoadExtType ExtType, EVT &MemVT,
5223                                     unsigned ShAmt) {
5224   if (!LDST)
5225     return false;
5226   // Only allow byte offsets.
5227   if (ShAmt % 8)
5228     return false;
5229 
5230   // Do not generate loads of non-round integer types since these can
5231   // be expensive (and would be wrong if the type is not byte sized).
5232   if (!MemVT.isRound())
5233     return false;
5234 
5235   // Don't change the width of a volatile or atomic loads.
5236   if (!LDST->isSimple())
5237     return false;
5238 
5239   EVT LdStMemVT = LDST->getMemoryVT();
5240 
5241   // Bail out when changing the scalable property, since we can't be sure that
5242   // we're actually narrowing here.
5243   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5244     return false;
5245 
5246   // Verify that we are actually reducing a load width here.
5247   if (LdStMemVT.bitsLT(MemVT))
5248     return false;
5249 
5250   // Ensure that this isn't going to produce an unsupported memory access.
5251   if (ShAmt) {
5252     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5253     const unsigned ByteShAmt = ShAmt / 8;
5254     const Align LDSTAlign = LDST->getAlign();
5255     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5256     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5257                                 LDST->getAddressSpace(), NarrowAlign,
5258                                 LDST->getMemOperand()->getFlags()))
5259       return false;
5260   }
5261 
5262   // It's not possible to generate a constant of extended or untyped type.
5263   EVT PtrType = LDST->getBasePtr().getValueType();
5264   if (PtrType == MVT::Untyped || PtrType.isExtended())
5265     return false;
5266 
5267   if (isa<LoadSDNode>(LDST)) {
5268     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5269     // Don't transform one with multiple uses, this would require adding a new
5270     // load.
5271     if (!SDValue(Load, 0).hasOneUse())
5272       return false;
5273 
5274     if (LegalOperations &&
5275         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5276       return false;
5277 
5278     // For the transform to be legal, the load must produce only two values
5279     // (the value loaded and the chain).  Don't transform a pre-increment
5280     // load, for example, which produces an extra value.  Otherwise the
5281     // transformation is not equivalent, and the downstream logic to replace
5282     // uses gets things wrong.
5283     if (Load->getNumValues() > 2)
5284       return false;
5285 
5286     // If the load that we're shrinking is an extload and we're not just
5287     // discarding the extension we can't simply shrink the load. Bail.
5288     // TODO: It would be possible to merge the extensions in some cases.
5289     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5290         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5291       return false;
5292 
5293     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5294       return false;
5295   } else {
5296     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5297     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5298     // Can't write outside the original store
5299     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5300       return false;
5301 
5302     if (LegalOperations &&
5303         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5304       return false;
5305   }
5306   return true;
5307 }
5308 
5309 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5310                                     SmallVectorImpl<LoadSDNode*> &Loads,
5311                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5312                                     ConstantSDNode *Mask,
5313                                     SDNode *&NodeToMask) {
5314   // Recursively search for the operands, looking for loads which can be
5315   // narrowed.
5316   for (SDValue Op : N->op_values()) {
5317     if (Op.getValueType().isVector())
5318       return false;
5319 
5320     // Some constants may need fixing up later if they are too large.
5321     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5322       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5323           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5324         NodesWithConsts.insert(N);
5325       continue;
5326     }
5327 
5328     if (!Op.hasOneUse())
5329       return false;
5330 
5331     switch(Op.getOpcode()) {
5332     case ISD::LOAD: {
5333       auto *Load = cast<LoadSDNode>(Op);
5334       EVT ExtVT;
5335       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5336           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5337 
5338         // ZEXTLOAD is already small enough.
5339         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5340             ExtVT.bitsGE(Load->getMemoryVT()))
5341           continue;
5342 
5343         // Use LE to convert equal sized loads to zext.
5344         if (ExtVT.bitsLE(Load->getMemoryVT()))
5345           Loads.push_back(Load);
5346 
5347         continue;
5348       }
5349       return false;
5350     }
5351     case ISD::ZERO_EXTEND:
5352     case ISD::AssertZext: {
5353       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5354       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5355       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5356         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5357         Op.getOperand(0).getValueType();
5358 
5359       // We can accept extending nodes if the mask is wider or an equal
5360       // width to the original type.
5361       if (ExtVT.bitsGE(VT))
5362         continue;
5363       break;
5364     }
5365     case ISD::OR:
5366     case ISD::XOR:
5367     case ISD::AND:
5368       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5369                              NodeToMask))
5370         return false;
5371       continue;
5372     }
5373 
5374     // Allow one node which will masked along with any loads found.
5375     if (NodeToMask)
5376       return false;
5377 
5378     // Also ensure that the node to be masked only produces one data result.
5379     NodeToMask = Op.getNode();
5380     if (NodeToMask->getNumValues() > 1) {
5381       bool HasValue = false;
5382       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5383         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5384         if (VT != MVT::Glue && VT != MVT::Other) {
5385           if (HasValue) {
5386             NodeToMask = nullptr;
5387             return false;
5388           }
5389           HasValue = true;
5390         }
5391       }
5392       assert(HasValue && "Node to be masked has no data result?");
5393     }
5394   }
5395   return true;
5396 }
5397 
5398 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5399   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5400   if (!Mask)
5401     return false;
5402 
5403   if (!Mask->getAPIntValue().isMask())
5404     return false;
5405 
5406   // No need to do anything if the and directly uses a load.
5407   if (isa<LoadSDNode>(N->getOperand(0)))
5408     return false;
5409 
5410   SmallVector<LoadSDNode*, 8> Loads;
5411   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5412   SDNode *FixupNode = nullptr;
5413   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5414     if (Loads.size() == 0)
5415       return false;
5416 
5417     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5418     SDValue MaskOp = N->getOperand(1);
5419 
5420     // If it exists, fixup the single node we allow in the tree that needs
5421     // masking.
5422     if (FixupNode) {
5423       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5424       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5425                                 FixupNode->getValueType(0),
5426                                 SDValue(FixupNode, 0), MaskOp);
5427       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5428       if (And.getOpcode() == ISD ::AND)
5429         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5430     }
5431 
5432     // Narrow any constants that need it.
5433     for (auto *LogicN : NodesWithConsts) {
5434       SDValue Op0 = LogicN->getOperand(0);
5435       SDValue Op1 = LogicN->getOperand(1);
5436 
5437       if (isa<ConstantSDNode>(Op0))
5438           std::swap(Op0, Op1);
5439 
5440       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5441                                 Op1, MaskOp);
5442 
5443       DAG.UpdateNodeOperands(LogicN, Op0, And);
5444     }
5445 
5446     // Create narrow loads.
5447     for (auto *Load : Loads) {
5448       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5449       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5450                                 SDValue(Load, 0), MaskOp);
5451       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5452       if (And.getOpcode() == ISD ::AND)
5453         And = SDValue(
5454             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5455       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5456       assert(NewLoad &&
5457              "Shouldn't be masking the load if it can't be narrowed");
5458       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5459     }
5460     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5461     return true;
5462   }
5463   return false;
5464 }
5465 
5466 // Unfold
5467 //    x &  (-1 'logical shift' y)
5468 // To
5469 //    (x 'opposite logical shift' y) 'logical shift' y
5470 // if it is better for performance.
5471 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5472   assert(N->getOpcode() == ISD::AND);
5473 
5474   SDValue N0 = N->getOperand(0);
5475   SDValue N1 = N->getOperand(1);
5476 
5477   // Do we actually prefer shifts over mask?
5478   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5479     return SDValue();
5480 
5481   // Try to match  (-1 '[outer] logical shift' y)
5482   unsigned OuterShift;
5483   unsigned InnerShift; // The opposite direction to the OuterShift.
5484   SDValue Y;           // Shift amount.
5485   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5486     if (!M.hasOneUse())
5487       return false;
5488     OuterShift = M->getOpcode();
5489     if (OuterShift == ISD::SHL)
5490       InnerShift = ISD::SRL;
5491     else if (OuterShift == ISD::SRL)
5492       InnerShift = ISD::SHL;
5493     else
5494       return false;
5495     if (!isAllOnesConstant(M->getOperand(0)))
5496       return false;
5497     Y = M->getOperand(1);
5498     return true;
5499   };
5500 
5501   SDValue X;
5502   if (matchMask(N1))
5503     X = N0;
5504   else if (matchMask(N0))
5505     X = N1;
5506   else
5507     return SDValue();
5508 
5509   SDLoc DL(N);
5510   EVT VT = N->getValueType(0);
5511 
5512   //     tmp = x   'opposite logical shift' y
5513   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5514   //     ret = tmp 'logical shift' y
5515   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5516 
5517   return T1;
5518 }
5519 
5520 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5521 /// For a target with a bit test, this is expected to become test + set and save
5522 /// at least 1 instruction.
5523 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5524   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5525 
5526   // This is probably not worthwhile without a supported type.
5527   EVT VT = And->getValueType(0);
5528   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5529   if (!TLI.isTypeLegal(VT))
5530     return SDValue();
5531 
5532   // Look through an optional extension and find a 'not'.
5533   // TODO: Should we favor test+set even without the 'not' op?
5534   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5535   if (Not.getOpcode() == ISD::ANY_EXTEND)
5536     Not = Not.getOperand(0);
5537   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5538     return SDValue();
5539 
5540   // Look though an optional truncation. The source operand may not be the same
5541   // type as the original 'and', but that is ok because we are masking off
5542   // everything but the low bit.
5543   SDValue Srl = Not.getOperand(0);
5544   if (Srl.getOpcode() == ISD::TRUNCATE)
5545     Srl = Srl.getOperand(0);
5546 
5547   // Match a shift-right by constant.
5548   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5549       !isa<ConstantSDNode>(Srl.getOperand(1)))
5550     return SDValue();
5551 
5552   // We might have looked through casts that make this transform invalid.
5553   // TODO: If the source type is wider than the result type, do the mask and
5554   //       compare in the source type.
5555   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5556   unsigned VTBitWidth = VT.getSizeInBits();
5557   if (ShiftAmt.uge(VTBitWidth))
5558     return SDValue();
5559 
5560   // Turn this into a bit-test pattern using mask op + setcc:
5561   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5562   SDLoc DL(And);
5563   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5564   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5565   SDValue Mask = DAG.getConstant(
5566       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5567   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5568   SDValue Zero = DAG.getConstant(0, DL, VT);
5569   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5570   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5571 }
5572 
5573 SDValue DAGCombiner::visitAND(SDNode *N) {
5574   SDValue N0 = N->getOperand(0);
5575   SDValue N1 = N->getOperand(1);
5576   EVT VT = N1.getValueType();
5577 
5578   // x & x --> x
5579   if (N0 == N1)
5580     return N0;
5581 
5582   // fold vector ops
5583   if (VT.isVector()) {
5584     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5585       return FoldedVOp;
5586 
5587     // fold (and x, 0) -> 0, vector edition
5588     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
5589       // do not return N0, because undef node may exist in N0
5590       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5591                              SDLoc(N), N0.getValueType());
5592     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5593       // do not return N1, because undef node may exist in N1
5594       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5595                              SDLoc(N), N1.getValueType());
5596 
5597     // fold (and x, -1) -> x, vector edition
5598     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
5599       return N1;
5600     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
5601       return N0;
5602 
5603     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5604     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5605     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5606     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5607         N0.hasOneUse() && N1.hasOneUse()) {
5608       EVT LoadVT = MLoad->getMemoryVT();
5609       EVT ExtVT = VT;
5610       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5611         // For this AND to be a zero extension of the masked load the elements
5612         // of the BuildVec must mask the bottom bits of the extended element
5613         // type
5614         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5615           uint64_t ElementSize =
5616               LoadVT.getVectorElementType().getScalarSizeInBits();
5617           if (Splat->getAPIntValue().isMask(ElementSize)) {
5618             return DAG.getMaskedLoad(
5619                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5620                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5621                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5622                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5623           }
5624         }
5625       }
5626     }
5627   }
5628 
5629   // fold (and c1, c2) -> c1&c2
5630   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5631   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5632     return C;
5633 
5634   // canonicalize constant to RHS
5635   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5636       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5637     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5638 
5639   // fold (and x, -1) -> x
5640   if (isAllOnesConstant(N1))
5641     return N0;
5642 
5643   // if (and x, c) is known to be zero, return 0
5644   unsigned BitWidth = VT.getScalarSizeInBits();
5645   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5646                                    APInt::getAllOnesValue(BitWidth)))
5647     return DAG.getConstant(0, SDLoc(N), VT);
5648 
5649   if (SDValue NewSel = foldBinOpIntoSelect(N))
5650     return NewSel;
5651 
5652   // reassociate and
5653   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5654     return RAND;
5655 
5656   // Try to convert a constant mask AND into a shuffle clear mask.
5657   if (VT.isVector())
5658     if (SDValue Shuffle = XformToShuffleWithZero(N))
5659       return Shuffle;
5660 
5661   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5662     return Combined;
5663 
5664   // fold (and (or x, C), D) -> D if (C & D) == D
5665   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5666     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5667   };
5668   if (N0.getOpcode() == ISD::OR &&
5669       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5670     return N1;
5671   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5672   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5673     SDValue N0Op0 = N0.getOperand(0);
5674     APInt Mask = ~N1C->getAPIntValue();
5675     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5676     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5677       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5678                                  N0.getValueType(), N0Op0);
5679 
5680       // Replace uses of the AND with uses of the Zero extend node.
5681       CombineTo(N, Zext);
5682 
5683       // We actually want to replace all uses of the any_extend with the
5684       // zero_extend, to avoid duplicating things.  This will later cause this
5685       // AND to be folded.
5686       CombineTo(N0.getNode(), Zext);
5687       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5688     }
5689   }
5690 
5691   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5692   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5693   // already be zero by virtue of the width of the base type of the load.
5694   //
5695   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5696   // more cases.
5697   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5698        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5699        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5700        N0.getOperand(0).getResNo() == 0) ||
5701       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5702     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5703                                          N0 : N0.getOperand(0) );
5704 
5705     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5706     // This can be a pure constant or a vector splat, in which case we treat the
5707     // vector as a scalar and use the splat value.
5708     APInt Constant = APInt::getNullValue(1);
5709     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5710       Constant = C->getAPIntValue();
5711     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5712       APInt SplatValue, SplatUndef;
5713       unsigned SplatBitSize;
5714       bool HasAnyUndefs;
5715       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5716                                              SplatBitSize, HasAnyUndefs);
5717       if (IsSplat) {
5718         // Undef bits can contribute to a possible optimisation if set, so
5719         // set them.
5720         SplatValue |= SplatUndef;
5721 
5722         // The splat value may be something like "0x00FFFFFF", which means 0 for
5723         // the first vector value and FF for the rest, repeating. We need a mask
5724         // that will apply equally to all members of the vector, so AND all the
5725         // lanes of the constant together.
5726         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5727 
5728         // If the splat value has been compressed to a bitlength lower
5729         // than the size of the vector lane, we need to re-expand it to
5730         // the lane size.
5731         if (EltBitWidth > SplatBitSize)
5732           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5733                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5734             SplatValue |= SplatValue.shl(SplatBitSize);
5735 
5736         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5737         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5738         if ((SplatBitSize % EltBitWidth) == 0) {
5739           Constant = APInt::getAllOnesValue(EltBitWidth);
5740           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5741             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5742         }
5743       }
5744     }
5745 
5746     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5747     // actually legal and isn't going to get expanded, else this is a false
5748     // optimisation.
5749     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5750                                                     Load->getValueType(0),
5751                                                     Load->getMemoryVT());
5752 
5753     // Resize the constant to the same size as the original memory access before
5754     // extension. If it is still the AllOnesValue then this AND is completely
5755     // unneeded.
5756     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5757 
5758     bool B;
5759     switch (Load->getExtensionType()) {
5760     default: B = false; break;
5761     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5762     case ISD::ZEXTLOAD:
5763     case ISD::NON_EXTLOAD: B = true; break;
5764     }
5765 
5766     if (B && Constant.isAllOnesValue()) {
5767       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5768       // preserve semantics once we get rid of the AND.
5769       SDValue NewLoad(Load, 0);
5770 
5771       // Fold the AND away. NewLoad may get replaced immediately.
5772       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5773 
5774       if (Load->getExtensionType() == ISD::EXTLOAD) {
5775         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5776                               Load->getValueType(0), SDLoc(Load),
5777                               Load->getChain(), Load->getBasePtr(),
5778                               Load->getOffset(), Load->getMemoryVT(),
5779                               Load->getMemOperand());
5780         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5781         if (Load->getNumValues() == 3) {
5782           // PRE/POST_INC loads have 3 values.
5783           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5784                            NewLoad.getValue(2) };
5785           CombineTo(Load, To, 3, true);
5786         } else {
5787           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5788         }
5789       }
5790 
5791       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5792     }
5793   }
5794 
5795   // fold (and (masked_gather x)) -> (zext_masked_gather x)
5796   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5797     EVT MemVT = GN0->getMemoryVT();
5798     EVT ScalarVT = MemVT.getScalarType();
5799 
5800     if (SDValue(GN0, 0).hasOneUse() &&
5801         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5802         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5803       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
5804                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
5805 
5806       SDValue ZExtLoad = DAG.getMaskedGather(
5807           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5808           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5809 
5810       CombineTo(N, ZExtLoad);
5811       AddToWorklist(ZExtLoad.getNode());
5812       // Avoid recheck of N.
5813       return SDValue(N, 0);
5814     }
5815   }
5816 
5817   // fold (and (load x), 255) -> (zextload x, i8)
5818   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5819   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5820   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5821                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5822                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5823     if (SDValue Res = ReduceLoadWidth(N)) {
5824       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5825         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5826       AddToWorklist(N);
5827       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5828       return SDValue(N, 0);
5829     }
5830   }
5831 
5832   if (LegalTypes) {
5833     // Attempt to propagate the AND back up to the leaves which, if they're
5834     // loads, can be combined to narrow loads and the AND node can be removed.
5835     // Perform after legalization so that extend nodes will already be
5836     // combined into the loads.
5837     if (BackwardsPropagateMask(N))
5838       return SDValue(N, 0);
5839   }
5840 
5841   if (SDValue Combined = visitANDLike(N0, N1, N))
5842     return Combined;
5843 
5844   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5845   if (N0.getOpcode() == N1.getOpcode())
5846     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5847       return V;
5848 
5849   // Masking the negated extension of a boolean is just the zero-extended
5850   // boolean:
5851   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5852   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5853   //
5854   // Note: the SimplifyDemandedBits fold below can make an information-losing
5855   // transform, and then we have no way to find this better fold.
5856   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5857     if (isNullOrNullSplat(N0.getOperand(0))) {
5858       SDValue SubRHS = N0.getOperand(1);
5859       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5860           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5861         return SubRHS;
5862       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5863           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5864         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5865     }
5866   }
5867 
5868   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5869   // fold (and (sra)) -> (and (srl)) when possible.
5870   if (SimplifyDemandedBits(SDValue(N, 0)))
5871     return SDValue(N, 0);
5872 
5873   // fold (zext_inreg (extload x)) -> (zextload x)
5874   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5875   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5876       (ISD::isEXTLoad(N0.getNode()) ||
5877        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5878     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5879     EVT MemVT = LN0->getMemoryVT();
5880     // If we zero all the possible extended bits, then we can turn this into
5881     // a zextload if we are running before legalize or the operation is legal.
5882     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5883     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5884     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5885     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5886         ((!LegalOperations && LN0->isSimple()) ||
5887          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5888       SDValue ExtLoad =
5889           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5890                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5891       AddToWorklist(N);
5892       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5893       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5894     }
5895   }
5896 
5897   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5898   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5899     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5900                                            N0.getOperand(1), false))
5901       return BSwap;
5902   }
5903 
5904   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5905     return Shifts;
5906 
5907   if (TLI.hasBitTest(N0, N1))
5908     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5909       return V;
5910 
5911   // Recognize the following pattern:
5912   //
5913   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5914   //
5915   // where bitmask is a mask that clears the upper bits of AndVT. The
5916   // number of bits in bitmask must be a power of two.
5917   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5918     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5919       return false;
5920 
5921     auto *C = dyn_cast<ConstantSDNode>(RHS);
5922     if (!C)
5923       return false;
5924 
5925     if (!C->getAPIntValue().isMask(
5926             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5927       return false;
5928 
5929     return true;
5930   };
5931 
5932   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5933   if (IsAndZeroExtMask(N0, N1))
5934     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5935 
5936   return SDValue();
5937 }
5938 
5939 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5940 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5941                                         bool DemandHighBits) {
5942   if (!LegalOperations)
5943     return SDValue();
5944 
5945   EVT VT = N->getValueType(0);
5946   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5947     return SDValue();
5948   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5949     return SDValue();
5950 
5951   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5952   bool LookPassAnd0 = false;
5953   bool LookPassAnd1 = false;
5954   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5955       std::swap(N0, N1);
5956   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5957       std::swap(N0, N1);
5958   if (N0.getOpcode() == ISD::AND) {
5959     if (!N0.getNode()->hasOneUse())
5960       return SDValue();
5961     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5962     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5963     // This is needed for X86.
5964     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5965                   N01C->getZExtValue() != 0xFFFF))
5966       return SDValue();
5967     N0 = N0.getOperand(0);
5968     LookPassAnd0 = true;
5969   }
5970 
5971   if (N1.getOpcode() == ISD::AND) {
5972     if (!N1.getNode()->hasOneUse())
5973       return SDValue();
5974     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5975     if (!N11C || N11C->getZExtValue() != 0xFF)
5976       return SDValue();
5977     N1 = N1.getOperand(0);
5978     LookPassAnd1 = true;
5979   }
5980 
5981   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5982     std::swap(N0, N1);
5983   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5984     return SDValue();
5985   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5986     return SDValue();
5987 
5988   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5989   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5990   if (!N01C || !N11C)
5991     return SDValue();
5992   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5993     return SDValue();
5994 
5995   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5996   SDValue N00 = N0->getOperand(0);
5997   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5998     if (!N00.getNode()->hasOneUse())
5999       return SDValue();
6000     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
6001     if (!N001C || N001C->getZExtValue() != 0xFF)
6002       return SDValue();
6003     N00 = N00.getOperand(0);
6004     LookPassAnd0 = true;
6005   }
6006 
6007   SDValue N10 = N1->getOperand(0);
6008   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
6009     if (!N10.getNode()->hasOneUse())
6010       return SDValue();
6011     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
6012     // Also allow 0xFFFF since the bits will be shifted out. This is needed
6013     // for X86.
6014     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
6015                    N101C->getZExtValue() != 0xFFFF))
6016       return SDValue();
6017     N10 = N10.getOperand(0);
6018     LookPassAnd1 = true;
6019   }
6020 
6021   if (N00 != N10)
6022     return SDValue();
6023 
6024   // Make sure everything beyond the low halfword gets set to zero since the SRL
6025   // 16 will clear the top bits.
6026   unsigned OpSizeInBits = VT.getSizeInBits();
6027   if (DemandHighBits && OpSizeInBits > 16) {
6028     // If the left-shift isn't masked out then the only way this is a bswap is
6029     // if all bits beyond the low 8 are 0. In that case the entire pattern
6030     // reduces to a left shift anyway: leave it for other parts of the combiner.
6031     if (!LookPassAnd0)
6032       return SDValue();
6033 
6034     // However, if the right shift isn't masked out then it might be because
6035     // it's not needed. See if we can spot that too.
6036     if (!LookPassAnd1 &&
6037         !DAG.MaskedValueIsZero(
6038             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
6039       return SDValue();
6040   }
6041 
6042   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
6043   if (OpSizeInBits > 16) {
6044     SDLoc DL(N);
6045     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
6046                       DAG.getConstant(OpSizeInBits - 16, DL,
6047                                       getShiftAmountTy(VT)));
6048   }
6049   return Res;
6050 }
6051 
6052 /// Return true if the specified node is an element that makes up a 32-bit
6053 /// packed halfword byteswap.
6054 /// ((x & 0x000000ff) << 8) |
6055 /// ((x & 0x0000ff00) >> 8) |
6056 /// ((x & 0x00ff0000) << 8) |
6057 /// ((x & 0xff000000) >> 8)
6058 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
6059   if (!N.getNode()->hasOneUse())
6060     return false;
6061 
6062   unsigned Opc = N.getOpcode();
6063   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
6064     return false;
6065 
6066   SDValue N0 = N.getOperand(0);
6067   unsigned Opc0 = N0.getOpcode();
6068   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
6069     return false;
6070 
6071   ConstantSDNode *N1C = nullptr;
6072   // SHL or SRL: look upstream for AND mask operand
6073   if (Opc == ISD::AND)
6074     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6075   else if (Opc0 == ISD::AND)
6076     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6077   if (!N1C)
6078     return false;
6079 
6080   unsigned MaskByteOffset;
6081   switch (N1C->getZExtValue()) {
6082   default:
6083     return false;
6084   case 0xFF:       MaskByteOffset = 0; break;
6085   case 0xFF00:     MaskByteOffset = 1; break;
6086   case 0xFFFF:
6087     // In case demanded bits didn't clear the bits that will be shifted out.
6088     // This is needed for X86.
6089     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
6090       MaskByteOffset = 1;
6091       break;
6092     }
6093     return false;
6094   case 0xFF0000:   MaskByteOffset = 2; break;
6095   case 0xFF000000: MaskByteOffset = 3; break;
6096   }
6097 
6098   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
6099   if (Opc == ISD::AND) {
6100     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
6101       // (x >> 8) & 0xff
6102       // (x >> 8) & 0xff0000
6103       if (Opc0 != ISD::SRL)
6104         return false;
6105       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6106       if (!C || C->getZExtValue() != 8)
6107         return false;
6108     } else {
6109       // (x << 8) & 0xff00
6110       // (x << 8) & 0xff000000
6111       if (Opc0 != ISD::SHL)
6112         return false;
6113       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6114       if (!C || C->getZExtValue() != 8)
6115         return false;
6116     }
6117   } else if (Opc == ISD::SHL) {
6118     // (x & 0xff) << 8
6119     // (x & 0xff0000) << 8
6120     if (MaskByteOffset != 0 && MaskByteOffset != 2)
6121       return false;
6122     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6123     if (!C || C->getZExtValue() != 8)
6124       return false;
6125   } else { // Opc == ISD::SRL
6126     // (x & 0xff00) >> 8
6127     // (x & 0xff000000) >> 8
6128     if (MaskByteOffset != 1 && MaskByteOffset != 3)
6129       return false;
6130     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
6131     if (!C || C->getZExtValue() != 8)
6132       return false;
6133   }
6134 
6135   if (Parts[MaskByteOffset])
6136     return false;
6137 
6138   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
6139   return true;
6140 }
6141 
6142 // Match 2 elements of a packed halfword bswap.
6143 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
6144   if (N.getOpcode() == ISD::OR)
6145     return isBSwapHWordElement(N.getOperand(0), Parts) &&
6146            isBSwapHWordElement(N.getOperand(1), Parts);
6147 
6148   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
6149     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
6150     if (!C || C->getAPIntValue() != 16)
6151       return false;
6152     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6153     return true;
6154   }
6155 
6156   return false;
6157 }
6158 
6159 // Match this pattern:
6160 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6161 // And rewrite this to:
6162 //   (rotr (bswap A), 16)
6163 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6164                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6165                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6166   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6167          "MatchBSwapHWordOrAndAnd: expecting i32");
6168   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6169     return SDValue();
6170   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6171     return SDValue();
6172   // TODO: this is too restrictive; lifting this restriction requires more tests
6173   if (!N0->hasOneUse() || !N1->hasOneUse())
6174     return SDValue();
6175   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6176   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6177   if (!Mask0 || !Mask1)
6178     return SDValue();
6179   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6180       Mask1->getAPIntValue() != 0x00ff00ff)
6181     return SDValue();
6182   SDValue Shift0 = N0.getOperand(0);
6183   SDValue Shift1 = N1.getOperand(0);
6184   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6185     return SDValue();
6186   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6187   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6188   if (!ShiftAmt0 || !ShiftAmt1)
6189     return SDValue();
6190   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6191     return SDValue();
6192   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6193     return SDValue();
6194 
6195   SDLoc DL(N);
6196   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6197   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6198   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6199 }
6200 
6201 /// Match a 32-bit packed halfword bswap. That is
6202 /// ((x & 0x000000ff) << 8) |
6203 /// ((x & 0x0000ff00) >> 8) |
6204 /// ((x & 0x00ff0000) << 8) |
6205 /// ((x & 0xff000000) >> 8)
6206 /// => (rotl (bswap x), 16)
6207 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6208   if (!LegalOperations)
6209     return SDValue();
6210 
6211   EVT VT = N->getValueType(0);
6212   if (VT != MVT::i32)
6213     return SDValue();
6214   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6215     return SDValue();
6216 
6217   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6218                                               getShiftAmountTy(VT)))
6219   return BSwap;
6220 
6221   // Try again with commuted operands.
6222   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6223                                               getShiftAmountTy(VT)))
6224   return BSwap;
6225 
6226 
6227   // Look for either
6228   // (or (bswaphpair), (bswaphpair))
6229   // (or (or (bswaphpair), (and)), (and))
6230   // (or (or (and), (bswaphpair)), (and))
6231   SDNode *Parts[4] = {};
6232 
6233   if (isBSwapHWordPair(N0, Parts)) {
6234     // (or (or (and), (and)), (or (and), (and)))
6235     if (!isBSwapHWordPair(N1, Parts))
6236       return SDValue();
6237   } else if (N0.getOpcode() == ISD::OR) {
6238     // (or (or (or (and), (and)), (and)), (and))
6239     if (!isBSwapHWordElement(N1, Parts))
6240       return SDValue();
6241     SDValue N00 = N0.getOperand(0);
6242     SDValue N01 = N0.getOperand(1);
6243     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6244         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6245       return SDValue();
6246   } else
6247     return SDValue();
6248 
6249   // Make sure the parts are all coming from the same node.
6250   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6251     return SDValue();
6252 
6253   SDLoc DL(N);
6254   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6255                               SDValue(Parts[0], 0));
6256 
6257   // Result of the bswap should be rotated by 16. If it's not legal, then
6258   // do  (x << 16) | (x >> 16).
6259   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6260   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6261     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6262   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6263     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6264   return DAG.getNode(ISD::OR, DL, VT,
6265                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6266                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6267 }
6268 
6269 /// This contains all DAGCombine rules which reduce two values combined by
6270 /// an Or operation to a single value \see visitANDLike().
6271 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6272   EVT VT = N1.getValueType();
6273   SDLoc DL(N);
6274 
6275   // fold (or x, undef) -> -1
6276   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6277     return DAG.getAllOnesConstant(DL, VT);
6278 
6279   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6280     return V;
6281 
6282   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6283   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6284       // Don't increase # computations.
6285       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6286     // We can only do this xform if we know that bits from X that are set in C2
6287     // but not in C1 are already zero.  Likewise for Y.
6288     if (const ConstantSDNode *N0O1C =
6289         getAsNonOpaqueConstant(N0.getOperand(1))) {
6290       if (const ConstantSDNode *N1O1C =
6291           getAsNonOpaqueConstant(N1.getOperand(1))) {
6292         // We can only do this xform if we know that bits from X that are set in
6293         // C2 but not in C1 are already zero.  Likewise for Y.
6294         const APInt &LHSMask = N0O1C->getAPIntValue();
6295         const APInt &RHSMask = N1O1C->getAPIntValue();
6296 
6297         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6298             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6299           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6300                                   N0.getOperand(0), N1.getOperand(0));
6301           return DAG.getNode(ISD::AND, DL, VT, X,
6302                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6303         }
6304       }
6305     }
6306   }
6307 
6308   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6309   if (N0.getOpcode() == ISD::AND &&
6310       N1.getOpcode() == ISD::AND &&
6311       N0.getOperand(0) == N1.getOperand(0) &&
6312       // Don't increase # computations.
6313       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6314     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6315                             N0.getOperand(1), N1.getOperand(1));
6316     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6317   }
6318 
6319   return SDValue();
6320 }
6321 
6322 /// OR combines for which the commuted variant will be tried as well.
6323 static SDValue visitORCommutative(
6324     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6325   EVT VT = N0.getValueType();
6326   if (N0.getOpcode() == ISD::AND) {
6327     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6328     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6329       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6330 
6331     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6332     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6333       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6334   }
6335 
6336   return SDValue();
6337 }
6338 
6339 SDValue DAGCombiner::visitOR(SDNode *N) {
6340   SDValue N0 = N->getOperand(0);
6341   SDValue N1 = N->getOperand(1);
6342   EVT VT = N1.getValueType();
6343 
6344   // x | x --> x
6345   if (N0 == N1)
6346     return N0;
6347 
6348   // fold vector ops
6349   if (VT.isVector()) {
6350     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6351       return FoldedVOp;
6352 
6353     // fold (or x, 0) -> x, vector edition
6354     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
6355       return N1;
6356     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6357       return N0;
6358 
6359     // fold (or x, -1) -> -1, vector edition
6360     if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
6361       // do not return N0, because undef node may exist in N0
6362       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6363     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6364       // do not return N1, because undef node may exist in N1
6365       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6366 
6367     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6368     // Do this only if the resulting shuffle is legal.
6369     if (isa<ShuffleVectorSDNode>(N0) &&
6370         isa<ShuffleVectorSDNode>(N1) &&
6371         // Avoid folding a node with illegal type.
6372         TLI.isTypeLegal(VT)) {
6373       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6374       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6375       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6376       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6377       // Ensure both shuffles have a zero input.
6378       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6379         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6380         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6381         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6382         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6383         bool CanFold = true;
6384         int NumElts = VT.getVectorNumElements();
6385         SmallVector<int, 4> Mask(NumElts);
6386 
6387         for (int i = 0; i != NumElts; ++i) {
6388           int M0 = SV0->getMaskElt(i);
6389           int M1 = SV1->getMaskElt(i);
6390 
6391           // Determine if either index is pointing to a zero vector.
6392           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6393           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6394 
6395           // If one element is zero and the otherside is undef, keep undef.
6396           // This also handles the case that both are undef.
6397           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6398             Mask[i] = -1;
6399             continue;
6400           }
6401 
6402           // Make sure only one of the elements is zero.
6403           if (M0Zero == M1Zero) {
6404             CanFold = false;
6405             break;
6406           }
6407 
6408           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6409 
6410           // We have a zero and non-zero element. If the non-zero came from
6411           // SV0 make the index a LHS index. If it came from SV1, make it
6412           // a RHS index. We need to mod by NumElts because we don't care
6413           // which operand it came from in the original shuffles.
6414           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6415         }
6416 
6417         if (CanFold) {
6418           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6419           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6420 
6421           SDValue LegalShuffle =
6422               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6423                                           Mask, DAG);
6424           if (LegalShuffle)
6425             return LegalShuffle;
6426         }
6427       }
6428     }
6429   }
6430 
6431   // fold (or c1, c2) -> c1|c2
6432   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6433   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6434     return C;
6435 
6436   // canonicalize constant to RHS
6437   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6438      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6439     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6440 
6441   // fold (or x, 0) -> x
6442   if (isNullConstant(N1))
6443     return N0;
6444 
6445   // fold (or x, -1) -> -1
6446   if (isAllOnesConstant(N1))
6447     return N1;
6448 
6449   if (SDValue NewSel = foldBinOpIntoSelect(N))
6450     return NewSel;
6451 
6452   // fold (or x, c) -> c iff (x & ~c) == 0
6453   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6454     return N1;
6455 
6456   if (SDValue Combined = visitORLike(N0, N1, N))
6457     return Combined;
6458 
6459   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6460     return Combined;
6461 
6462   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6463   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6464     return BSwap;
6465   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6466     return BSwap;
6467 
6468   // reassociate or
6469   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6470     return ROR;
6471 
6472   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6473   // iff (c1 & c2) != 0 or c1/c2 are undef.
6474   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6475     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6476   };
6477   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6478       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6479     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6480                                                  {N1, N0.getOperand(1)})) {
6481       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6482       AddToWorklist(IOR.getNode());
6483       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6484     }
6485   }
6486 
6487   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6488     return Combined;
6489   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6490     return Combined;
6491 
6492   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6493   if (N0.getOpcode() == N1.getOpcode())
6494     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6495       return V;
6496 
6497   // See if this is some rotate idiom.
6498   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6499     return Rot;
6500 
6501   if (SDValue Load = MatchLoadCombine(N))
6502     return Load;
6503 
6504   // Simplify the operands using demanded-bits information.
6505   if (SimplifyDemandedBits(SDValue(N, 0)))
6506     return SDValue(N, 0);
6507 
6508   // If OR can be rewritten into ADD, try combines based on ADD.
6509   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6510       DAG.haveNoCommonBitsSet(N0, N1))
6511     if (SDValue Combined = visitADDLike(N))
6512       return Combined;
6513 
6514   return SDValue();
6515 }
6516 
6517 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6518   if (Op.getOpcode() == ISD::AND &&
6519       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6520     Mask = Op.getOperand(1);
6521     return Op.getOperand(0);
6522   }
6523   return Op;
6524 }
6525 
6526 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6527 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6528                             SDValue &Mask) {
6529   Op = stripConstantMask(DAG, Op, Mask);
6530   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6531     Shift = Op;
6532     return true;
6533   }
6534   return false;
6535 }
6536 
6537 /// Helper function for visitOR to extract the needed side of a rotate idiom
6538 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6539 /// InstCombine merged some outside op with one of the shifts from
6540 /// the rotate pattern.
6541 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6542 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6543 /// patterns:
6544 ///
6545 ///   (or (add v v) (shrl v bitwidth-1)):
6546 ///     expands (add v v) -> (shl v 1)
6547 ///
6548 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6549 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6550 ///
6551 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6552 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6553 ///
6554 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6555 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6556 ///
6557 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6558 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6559 ///
6560 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6561 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6562                                      SDValue ExtractFrom, SDValue &Mask,
6563                                      const SDLoc &DL) {
6564   assert(OppShift && ExtractFrom && "Empty SDValue");
6565   assert(
6566       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6567       "Existing shift must be valid as a rotate half");
6568 
6569   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6570 
6571   // Value and Type of the shift.
6572   SDValue OppShiftLHS = OppShift.getOperand(0);
6573   EVT ShiftedVT = OppShiftLHS.getValueType();
6574 
6575   // Amount of the existing shift.
6576   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6577 
6578   // (add v v) -> (shl v 1)
6579   // TODO: Should this be a general DAG canonicalization?
6580   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6581       ExtractFrom.getOpcode() == ISD::ADD &&
6582       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6583       ExtractFrom.getOperand(0) == OppShiftLHS &&
6584       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6585     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6586                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6587 
6588   // Preconditions:
6589   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6590   //
6591   // Find opcode of the needed shift to be extracted from (op0 v c0).
6592   unsigned Opcode = ISD::DELETED_NODE;
6593   bool IsMulOrDiv = false;
6594   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6595   // opcode or its arithmetic (mul or udiv) variant.
6596   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6597     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6598     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6599       return false;
6600     Opcode = NeededShift;
6601     return true;
6602   };
6603   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6604   // that the needed shift can be extracted from.
6605   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6606       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6607     return SDValue();
6608 
6609   // op0 must be the same opcode on both sides, have the same LHS argument,
6610   // and produce the same value type.
6611   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6612       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6613       ShiftedVT != ExtractFrom.getValueType())
6614     return SDValue();
6615 
6616   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6617   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6618   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6619   ConstantSDNode *ExtractFromCst =
6620       isConstOrConstSplat(ExtractFrom.getOperand(1));
6621   // TODO: We should be able to handle non-uniform constant vectors for these values
6622   // Check that we have constant values.
6623   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6624       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6625       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6626     return SDValue();
6627 
6628   // Compute the shift amount we need to extract to complete the rotate.
6629   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6630   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6631     return SDValue();
6632   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6633   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6634   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6635   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6636   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6637 
6638   // Now try extract the needed shift from the ExtractFrom op and see if the
6639   // result matches up with the existing shift's LHS op.
6640   if (IsMulOrDiv) {
6641     // Op to extract from is a mul or udiv by a constant.
6642     // Check:
6643     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6644     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6645     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6646                                                  NeededShiftAmt.getZExtValue());
6647     APInt ResultAmt;
6648     APInt Rem;
6649     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6650     if (Rem != 0 || ResultAmt != OppLHSAmt)
6651       return SDValue();
6652   } else {
6653     // Op to extract from is a shift by a constant.
6654     // Check:
6655     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6656     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6657                                           ExtractFromAmt.getBitWidth()))
6658       return SDValue();
6659   }
6660 
6661   // Return the expanded shift op that should allow a rotate to be formed.
6662   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6663   EVT ResVT = ExtractFrom.getValueType();
6664   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6665   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6666 }
6667 
6668 // Return true if we can prove that, whenever Neg and Pos are both in the
6669 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6670 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6671 //
6672 //     (or (shift1 X, Neg), (shift2 X, Pos))
6673 //
6674 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6675 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6676 // to consider shift amounts with defined behavior.
6677 //
6678 // The IsRotate flag should be set when the LHS of both shifts is the same.
6679 // Otherwise if matching a general funnel shift, it should be clear.
6680 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6681                            SelectionDAG &DAG, bool IsRotate) {
6682   // If EltSize is a power of 2 then:
6683   //
6684   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6685   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6686   //
6687   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6688   // for the stronger condition:
6689   //
6690   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6691   //
6692   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6693   // we can just replace Neg with Neg' for the rest of the function.
6694   //
6695   // In other cases we check for the even stronger condition:
6696   //
6697   //     Neg == EltSize - Pos                                    [B]
6698   //
6699   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6700   // behavior if Pos == 0 (and consequently Neg == EltSize).
6701   //
6702   // We could actually use [A] whenever EltSize is a power of 2, but the
6703   // only extra cases that it would match are those uninteresting ones
6704   // where Neg and Pos are never in range at the same time.  E.g. for
6705   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6706   // as well as (sub 32, Pos), but:
6707   //
6708   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6709   //
6710   // always invokes undefined behavior for 32-bit X.
6711   //
6712   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6713   //
6714   // NOTE: We can only do this when matching an AND and not a general
6715   // funnel shift.
6716   unsigned MaskLoBits = 0;
6717   if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6718     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6719       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6720       unsigned Bits = Log2_64(EltSize);
6721       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6722           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6723         Neg = Neg.getOperand(0);
6724         MaskLoBits = Bits;
6725       }
6726     }
6727   }
6728 
6729   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6730   if (Neg.getOpcode() != ISD::SUB)
6731     return false;
6732   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6733   if (!NegC)
6734     return false;
6735   SDValue NegOp1 = Neg.getOperand(1);
6736 
6737   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6738   // Pos'.  The truncation is redundant for the purpose of the equality.
6739   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6740     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6741       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6742       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6743           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6744            MaskLoBits))
6745         Pos = Pos.getOperand(0);
6746     }
6747   }
6748 
6749   // The condition we need is now:
6750   //
6751   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6752   //
6753   // If NegOp1 == Pos then we need:
6754   //
6755   //              EltSize & Mask == NegC & Mask
6756   //
6757   // (because "x & Mask" is a truncation and distributes through subtraction).
6758   //
6759   // We also need to account for a potential truncation of NegOp1 if the amount
6760   // has already been legalized to a shift amount type.
6761   APInt Width;
6762   if ((Pos == NegOp1) ||
6763       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6764     Width = NegC->getAPIntValue();
6765 
6766   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6767   // Then the condition we want to prove becomes:
6768   //
6769   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6770   //
6771   // which, again because "x & Mask" is a truncation, becomes:
6772   //
6773   //                NegC & Mask == (EltSize - PosC) & Mask
6774   //             EltSize & Mask == (NegC + PosC) & Mask
6775   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6776     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6777       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6778     else
6779       return false;
6780   } else
6781     return false;
6782 
6783   // Now we just need to check that EltSize & Mask == Width & Mask.
6784   if (MaskLoBits)
6785     // EltSize & Mask is 0 since Mask is EltSize - 1.
6786     return Width.getLoBits(MaskLoBits) == 0;
6787   return Width == EltSize;
6788 }
6789 
6790 // A subroutine of MatchRotate used once we have found an OR of two opposite
6791 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6792 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6793 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6794 // Neg with outer conversions stripped away.
6795 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6796                                        SDValue Neg, SDValue InnerPos,
6797                                        SDValue InnerNeg, unsigned PosOpcode,
6798                                        unsigned NegOpcode, const SDLoc &DL) {
6799   // fold (or (shl x, (*ext y)),
6800   //          (srl x, (*ext (sub 32, y)))) ->
6801   //   (rotl x, y) or (rotr x, (sub 32, y))
6802   //
6803   // fold (or (shl x, (*ext (sub 32, y))),
6804   //          (srl x, (*ext y))) ->
6805   //   (rotr x, y) or (rotl x, (sub 32, y))
6806   EVT VT = Shifted.getValueType();
6807   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6808                      /*IsRotate*/ true)) {
6809     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6810     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6811                        HasPos ? Pos : Neg);
6812   }
6813 
6814   return SDValue();
6815 }
6816 
6817 // A subroutine of MatchRotate used once we have found an OR of two opposite
6818 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6819 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6820 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6821 // Neg with outer conversions stripped away.
6822 // TODO: Merge with MatchRotatePosNeg.
6823 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6824                                        SDValue Neg, SDValue InnerPos,
6825                                        SDValue InnerNeg, unsigned PosOpcode,
6826                                        unsigned NegOpcode, const SDLoc &DL) {
6827   EVT VT = N0.getValueType();
6828   unsigned EltBits = VT.getScalarSizeInBits();
6829 
6830   // fold (or (shl x0, (*ext y)),
6831   //          (srl x1, (*ext (sub 32, y)))) ->
6832   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6833   //
6834   // fold (or (shl x0, (*ext (sub 32, y))),
6835   //          (srl x1, (*ext y))) ->
6836   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6837   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
6838     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6839     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6840                        HasPos ? Pos : Neg);
6841   }
6842 
6843   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6844   // so for now just use the PosOpcode case if its legal.
6845   // TODO: When can we use the NegOpcode case?
6846   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6847     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6848       if (Op.getOpcode() != BinOpc)
6849         return false;
6850       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6851       return Cst && (Cst->getAPIntValue() == Imm);
6852     };
6853 
6854     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6855     //   -> (fshl x0, x1, y)
6856     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6857         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6858         InnerPos == InnerNeg.getOperand(0) &&
6859         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6860       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6861     }
6862 
6863     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6864     //   -> (fshr x0, x1, y)
6865     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6866         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6867         InnerNeg == InnerPos.getOperand(0) &&
6868         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6869       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6870     }
6871 
6872     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6873     //   -> (fshr x0, x1, y)
6874     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6875     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6876         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6877         InnerNeg == InnerPos.getOperand(0) &&
6878         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6879       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6880     }
6881   }
6882 
6883   return SDValue();
6884 }
6885 
6886 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6887 // idioms for rotate, and if the target supports rotation instructions, generate
6888 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6889 // with different shifted sources.
6890 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6891   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6892   EVT VT = LHS.getValueType();
6893   if (!TLI.isTypeLegal(VT))
6894     return SDValue();
6895 
6896   // The target must have at least one rotate/funnel flavor.
6897   bool HasROTL = hasOperation(ISD::ROTL, VT);
6898   bool HasROTR = hasOperation(ISD::ROTR, VT);
6899   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6900   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6901   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6902     return SDValue();
6903 
6904   // Check for truncated rotate.
6905   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6906       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6907     assert(LHS.getValueType() == RHS.getValueType());
6908     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6909       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6910     }
6911   }
6912 
6913   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6914   SDValue LHSShift;   // The shift.
6915   SDValue LHSMask;    // AND value if any.
6916   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6917 
6918   SDValue RHSShift;   // The shift.
6919   SDValue RHSMask;    // AND value if any.
6920   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6921 
6922   // If neither side matched a rotate half, bail
6923   if (!LHSShift && !RHSShift)
6924     return SDValue();
6925 
6926   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6927   // side of the rotate, so try to handle that here. In all cases we need to
6928   // pass the matched shift from the opposite side to compute the opcode and
6929   // needed shift amount to extract.  We still want to do this if both sides
6930   // matched a rotate half because one half may be a potential overshift that
6931   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6932   // single one).
6933 
6934   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6935   if (LHSShift)
6936     if (SDValue NewRHSShift =
6937             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6938       RHSShift = NewRHSShift;
6939   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6940   if (RHSShift)
6941     if (SDValue NewLHSShift =
6942             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6943       LHSShift = NewLHSShift;
6944 
6945   // If a side is still missing, nothing else we can do.
6946   if (!RHSShift || !LHSShift)
6947     return SDValue();
6948 
6949   // At this point we've matched or extracted a shift op on each side.
6950 
6951   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6952     return SDValue(); // Shifts must disagree.
6953 
6954   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6955   if (!IsRotate && !(HasFSHL || HasFSHR))
6956     return SDValue(); // Requires funnel shift support.
6957 
6958   // Canonicalize shl to left side in a shl/srl pair.
6959   if (RHSShift.getOpcode() == ISD::SHL) {
6960     std::swap(LHS, RHS);
6961     std::swap(LHSShift, RHSShift);
6962     std::swap(LHSMask, RHSMask);
6963   }
6964 
6965   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6966   SDValue LHSShiftArg = LHSShift.getOperand(0);
6967   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6968   SDValue RHSShiftArg = RHSShift.getOperand(0);
6969   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6970 
6971   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6972   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6973   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6974   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6975   // iff C1+C2 == EltSizeInBits
6976   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6977                                         ConstantSDNode *RHS) {
6978     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6979   };
6980   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6981     SDValue Res;
6982     if (IsRotate && (HasROTL || HasROTR))
6983       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6984                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6985     else
6986       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6987                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6988 
6989     // If there is an AND of either shifted operand, apply it to the result.
6990     if (LHSMask.getNode() || RHSMask.getNode()) {
6991       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6992       SDValue Mask = AllOnes;
6993 
6994       if (LHSMask.getNode()) {
6995         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6996         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6997                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6998       }
6999       if (RHSMask.getNode()) {
7000         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
7001         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
7002                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
7003       }
7004 
7005       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
7006     }
7007 
7008     return Res;
7009   }
7010 
7011   // If there is a mask here, and we have a variable shift, we can't be sure
7012   // that we're masking out the right stuff.
7013   if (LHSMask.getNode() || RHSMask.getNode())
7014     return SDValue();
7015 
7016   // If the shift amount is sign/zext/any-extended just peel it off.
7017   SDValue LExtOp0 = LHSShiftAmt;
7018   SDValue RExtOp0 = RHSShiftAmt;
7019   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7020        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7021        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7022        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
7023       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
7024        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
7025        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
7026        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
7027     LExtOp0 = LHSShiftAmt.getOperand(0);
7028     RExtOp0 = RHSShiftAmt.getOperand(0);
7029   }
7030 
7031   if (IsRotate && (HasROTL || HasROTR)) {
7032     SDValue TryL =
7033         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
7034                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
7035     if (TryL)
7036       return TryL;
7037 
7038     SDValue TryR =
7039         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
7040                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
7041     if (TryR)
7042       return TryR;
7043   }
7044 
7045   SDValue TryL =
7046       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
7047                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
7048   if (TryL)
7049     return TryL;
7050 
7051   SDValue TryR =
7052       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
7053                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
7054   if (TryR)
7055     return TryR;
7056 
7057   return SDValue();
7058 }
7059 
7060 namespace {
7061 
7062 /// Represents known origin of an individual byte in load combine pattern. The
7063 /// value of the byte is either constant zero or comes from memory.
7064 struct ByteProvider {
7065   // For constant zero providers Load is set to nullptr. For memory providers
7066   // Load represents the node which loads the byte from memory.
7067   // ByteOffset is the offset of the byte in the value produced by the load.
7068   LoadSDNode *Load = nullptr;
7069   unsigned ByteOffset = 0;
7070 
7071   ByteProvider() = default;
7072 
7073   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
7074     return ByteProvider(Load, ByteOffset);
7075   }
7076 
7077   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
7078 
7079   bool isConstantZero() const { return !Load; }
7080   bool isMemory() const { return Load; }
7081 
7082   bool operator==(const ByteProvider &Other) const {
7083     return Other.Load == Load && Other.ByteOffset == ByteOffset;
7084   }
7085 
7086 private:
7087   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
7088       : Load(Load), ByteOffset(ByteOffset) {}
7089 };
7090 
7091 } // end anonymous namespace
7092 
7093 /// Recursively traverses the expression calculating the origin of the requested
7094 /// byte of the given value. Returns None if the provider can't be calculated.
7095 ///
7096 /// For all the values except the root of the expression verifies that the value
7097 /// has exactly one use and if it's not true return None. This way if the origin
7098 /// of the byte is returned it's guaranteed that the values which contribute to
7099 /// the byte are not used outside of this expression.
7100 ///
7101 /// Because the parts of the expression are not allowed to have more than one
7102 /// use this function iterates over trees, not DAGs. So it never visits the same
7103 /// node more than once.
7104 static const Optional<ByteProvider>
7105 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
7106                       bool Root = false) {
7107   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
7108   if (Depth == 10)
7109     return None;
7110 
7111   if (!Root && !Op.hasOneUse())
7112     return None;
7113 
7114   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
7115   unsigned BitWidth = Op.getValueSizeInBits();
7116   if (BitWidth % 8 != 0)
7117     return None;
7118   unsigned ByteWidth = BitWidth / 8;
7119   assert(Index < ByteWidth && "invalid index requested");
7120   (void) ByteWidth;
7121 
7122   switch (Op.getOpcode()) {
7123   case ISD::OR: {
7124     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
7125     if (!LHS)
7126       return None;
7127     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
7128     if (!RHS)
7129       return None;
7130 
7131     if (LHS->isConstantZero())
7132       return RHS;
7133     if (RHS->isConstantZero())
7134       return LHS;
7135     return None;
7136   }
7137   case ISD::SHL: {
7138     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
7139     if (!ShiftOp)
7140       return None;
7141 
7142     uint64_t BitShift = ShiftOp->getZExtValue();
7143     if (BitShift % 8 != 0)
7144       return None;
7145     uint64_t ByteShift = BitShift / 8;
7146 
7147     return Index < ByteShift
7148                ? ByteProvider::getConstantZero()
7149                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
7150                                        Depth + 1);
7151   }
7152   case ISD::ANY_EXTEND:
7153   case ISD::SIGN_EXTEND:
7154   case ISD::ZERO_EXTEND: {
7155     SDValue NarrowOp = Op->getOperand(0);
7156     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
7157     if (NarrowBitWidth % 8 != 0)
7158       return None;
7159     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7160 
7161     if (Index >= NarrowByteWidth)
7162       return Op.getOpcode() == ISD::ZERO_EXTEND
7163                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7164                  : None;
7165     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7166   }
7167   case ISD::BSWAP:
7168     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7169                                  Depth + 1);
7170   case ISD::LOAD: {
7171     auto L = cast<LoadSDNode>(Op.getNode());
7172     if (!L->isSimple() || L->isIndexed())
7173       return None;
7174 
7175     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7176     if (NarrowBitWidth % 8 != 0)
7177       return None;
7178     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7179 
7180     if (Index >= NarrowByteWidth)
7181       return L->getExtensionType() == ISD::ZEXTLOAD
7182                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7183                  : None;
7184     return ByteProvider::getMemory(L, Index);
7185   }
7186   }
7187 
7188   return None;
7189 }
7190 
7191 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7192   return i;
7193 }
7194 
7195 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7196   return BW - i - 1;
7197 }
7198 
7199 // Check if the bytes offsets we are looking at match with either big or
7200 // little endian value loaded. Return true for big endian, false for little
7201 // endian, and None if match failed.
7202 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7203                                   int64_t FirstOffset) {
7204   // The endian can be decided only when it is 2 bytes at least.
7205   unsigned Width = ByteOffsets.size();
7206   if (Width < 2)
7207     return None;
7208 
7209   bool BigEndian = true, LittleEndian = true;
7210   for (unsigned i = 0; i < Width; i++) {
7211     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7212     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7213     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7214     if (!BigEndian && !LittleEndian)
7215       return None;
7216   }
7217 
7218   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7219                                         "little endian");
7220   return BigEndian;
7221 }
7222 
7223 static SDValue stripTruncAndExt(SDValue Value) {
7224   switch (Value.getOpcode()) {
7225   case ISD::TRUNCATE:
7226   case ISD::ZERO_EXTEND:
7227   case ISD::SIGN_EXTEND:
7228   case ISD::ANY_EXTEND:
7229     return stripTruncAndExt(Value.getOperand(0));
7230   }
7231   return Value;
7232 }
7233 
7234 /// Match a pattern where a wide type scalar value is stored by several narrow
7235 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7236 /// supports it.
7237 ///
7238 /// Assuming little endian target:
7239 ///  i8 *p = ...
7240 ///  i32 val = ...
7241 ///  p[0] = (val >> 0) & 0xFF;
7242 ///  p[1] = (val >> 8) & 0xFF;
7243 ///  p[2] = (val >> 16) & 0xFF;
7244 ///  p[3] = (val >> 24) & 0xFF;
7245 /// =>
7246 ///  *((i32)p) = val;
7247 ///
7248 ///  i8 *p = ...
7249 ///  i32 val = ...
7250 ///  p[0] = (val >> 24) & 0xFF;
7251 ///  p[1] = (val >> 16) & 0xFF;
7252 ///  p[2] = (val >> 8) & 0xFF;
7253 ///  p[3] = (val >> 0) & 0xFF;
7254 /// =>
7255 ///  *((i32)p) = BSWAP(val);
7256 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7257   // The matching looks for "store (trunc x)" patterns that appear early but are
7258   // likely to be replaced by truncating store nodes during combining.
7259   // TODO: If there is evidence that running this later would help, this
7260   //       limitation could be removed. Legality checks may need to be added
7261   //       for the created store and optional bswap/rotate.
7262   if (LegalOperations)
7263     return SDValue();
7264 
7265   // Collect all the stores in the chain.
7266   SDValue Chain;
7267   SmallVector<StoreSDNode *, 8> Stores;
7268   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
7269     // TODO: Allow unordered atomics when wider type is legal (see D66309)
7270     EVT MemVT = Store->getMemoryVT();
7271     if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7272         !Store->isSimple() || Store->isIndexed())
7273       return SDValue();
7274     Stores.push_back(Store);
7275     Chain = Store->getChain();
7276   }
7277   // There is no reason to continue if we do not have at least a pair of stores.
7278   if (Stores.size() < 2)
7279     return SDValue();
7280 
7281   // Handle simple types only.
7282   LLVMContext &Context = *DAG.getContext();
7283   unsigned NumStores = Stores.size();
7284   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7285   unsigned WideNumBits = NumStores * NarrowNumBits;
7286   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7287   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7288     return SDValue();
7289 
7290   // Check if all bytes of the source value that we are looking at are stored
7291   // to the same base address. Collect offsets from Base address into OffsetMap.
7292   SDValue SourceValue;
7293   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7294   int64_t FirstOffset = INT64_MAX;
7295   StoreSDNode *FirstStore = nullptr;
7296   Optional<BaseIndexOffset> Base;
7297   for (auto Store : Stores) {
7298     // All the stores store different parts of the CombinedValue. A truncate is
7299     // required to get the partial value.
7300     SDValue Trunc = Store->getValue();
7301     if (Trunc.getOpcode() != ISD::TRUNCATE)
7302       return SDValue();
7303     // Other than the first/last part, a shift operation is required to get the
7304     // offset.
7305     int64_t Offset = 0;
7306     SDValue WideVal = Trunc.getOperand(0);
7307     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7308         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7309       // The shift amount must be a constant multiple of the narrow type.
7310       // It is translated to the offset address in the wide source value "y".
7311       //
7312       // x = srl y, ShiftAmtC
7313       // i8 z = trunc x
7314       // store z, ...
7315       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7316       if (ShiftAmtC % NarrowNumBits != 0)
7317         return SDValue();
7318 
7319       Offset = ShiftAmtC / NarrowNumBits;
7320       WideVal = WideVal.getOperand(0);
7321     }
7322 
7323     // Stores must share the same source value with different offsets.
7324     // Truncate and extends should be stripped to get the single source value.
7325     if (!SourceValue)
7326       SourceValue = WideVal;
7327     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7328       return SDValue();
7329     else if (SourceValue.getValueType() != WideVT) {
7330       if (WideVal.getValueType() == WideVT ||
7331           WideVal.getScalarValueSizeInBits() >
7332               SourceValue.getScalarValueSizeInBits())
7333         SourceValue = WideVal;
7334       // Give up if the source value type is smaller than the store size.
7335       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7336         return SDValue();
7337     }
7338 
7339     // Stores must share the same base address.
7340     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7341     int64_t ByteOffsetFromBase = 0;
7342     if (!Base)
7343       Base = Ptr;
7344     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7345       return SDValue();
7346 
7347     // Remember the first store.
7348     if (ByteOffsetFromBase < FirstOffset) {
7349       FirstStore = Store;
7350       FirstOffset = ByteOffsetFromBase;
7351     }
7352     // Map the offset in the store and the offset in the combined value, and
7353     // early return if it has been set before.
7354     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7355       return SDValue();
7356     OffsetMap[Offset] = ByteOffsetFromBase;
7357   }
7358 
7359   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7360   assert(FirstStore && "First store must be set");
7361 
7362   // Check that a store of the wide type is both allowed and fast on the target
7363   const DataLayout &Layout = DAG.getDataLayout();
7364   bool Fast = false;
7365   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7366                                         *FirstStore->getMemOperand(), &Fast);
7367   if (!Allowed || !Fast)
7368     return SDValue();
7369 
7370   // Check if the pieces of the value are going to the expected places in memory
7371   // to merge the stores.
7372   auto checkOffsets = [&](bool MatchLittleEndian) {
7373     if (MatchLittleEndian) {
7374       for (unsigned i = 0; i != NumStores; ++i)
7375         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7376           return false;
7377     } else { // MatchBigEndian by reversing loop counter.
7378       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7379         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7380           return false;
7381     }
7382     return true;
7383   };
7384 
7385   // Check if the offsets line up for the native data layout of this target.
7386   bool NeedBswap = false;
7387   bool NeedRotate = false;
7388   if (!checkOffsets(Layout.isLittleEndian())) {
7389     // Special-case: check if byte offsets line up for the opposite endian.
7390     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7391       NeedBswap = true;
7392     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7393       NeedRotate = true;
7394     else
7395       return SDValue();
7396   }
7397 
7398   SDLoc DL(N);
7399   if (WideVT != SourceValue.getValueType()) {
7400     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7401            "Unexpected store value to merge");
7402     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7403   }
7404 
7405   // Before legalize we can introduce illegal bswaps/rotates which will be later
7406   // converted to an explicit bswap sequence. This way we end up with a single
7407   // store and byte shuffling instead of several stores and byte shuffling.
7408   if (NeedBswap) {
7409     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7410   } else if (NeedRotate) {
7411     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7412     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7413     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7414   }
7415 
7416   SDValue NewStore =
7417       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7418                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7419 
7420   // Rely on other DAG combine rules to remove the other individual stores.
7421   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7422   return NewStore;
7423 }
7424 
7425 /// Match a pattern where a wide type scalar value is loaded by several narrow
7426 /// loads and combined by shifts and ors. Fold it into a single load or a load
7427 /// and a BSWAP if the targets supports it.
7428 ///
7429 /// Assuming little endian target:
7430 ///  i8 *a = ...
7431 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7432 /// =>
7433 ///  i32 val = *((i32)a)
7434 ///
7435 ///  i8 *a = ...
7436 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7437 /// =>
7438 ///  i32 val = BSWAP(*((i32)a))
7439 ///
7440 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7441 /// interact well with the worklist mechanism. When a part of the pattern is
7442 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7443 /// but the root node of the pattern which triggers the load combine is not
7444 /// necessarily a direct user of the changed node. For example, once the address
7445 /// of t28 load is reassociated load combine won't be triggered:
7446 ///             t25: i32 = add t4, Constant:i32<2>
7447 ///           t26: i64 = sign_extend t25
7448 ///        t27: i64 = add t2, t26
7449 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7450 ///     t29: i32 = zero_extend t28
7451 ///   t32: i32 = shl t29, Constant:i8<8>
7452 /// t33: i32 = or t23, t32
7453 /// As a possible fix visitLoad can check if the load can be a part of a load
7454 /// combine pattern and add corresponding OR roots to the worklist.
7455 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7456   assert(N->getOpcode() == ISD::OR &&
7457          "Can only match load combining against OR nodes");
7458 
7459   // Handles simple types only
7460   EVT VT = N->getValueType(0);
7461   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7462     return SDValue();
7463   unsigned ByteWidth = VT.getSizeInBits() / 8;
7464 
7465   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7466   auto MemoryByteOffset = [&] (ByteProvider P) {
7467     assert(P.isMemory() && "Must be a memory byte provider");
7468     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7469     assert(LoadBitWidth % 8 == 0 &&
7470            "can only analyze providers for individual bytes not bit");
7471     unsigned LoadByteWidth = LoadBitWidth / 8;
7472     return IsBigEndianTarget
7473             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7474             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7475   };
7476 
7477   Optional<BaseIndexOffset> Base;
7478   SDValue Chain;
7479 
7480   SmallPtrSet<LoadSDNode *, 8> Loads;
7481   Optional<ByteProvider> FirstByteProvider;
7482   int64_t FirstOffset = INT64_MAX;
7483 
7484   // Check if all the bytes of the OR we are looking at are loaded from the same
7485   // base address. Collect bytes offsets from Base address in ByteOffsets.
7486   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7487   unsigned ZeroExtendedBytes = 0;
7488   for (int i = ByteWidth - 1; i >= 0; --i) {
7489     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7490     if (!P)
7491       return SDValue();
7492 
7493     if (P->isConstantZero()) {
7494       // It's OK for the N most significant bytes to be 0, we can just
7495       // zero-extend the load.
7496       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7497         return SDValue();
7498       continue;
7499     }
7500     assert(P->isMemory() && "provenance should either be memory or zero");
7501 
7502     LoadSDNode *L = P->Load;
7503     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7504            !L->isIndexed() &&
7505            "Must be enforced by calculateByteProvider");
7506     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7507 
7508     // All loads must share the same chain
7509     SDValue LChain = L->getChain();
7510     if (!Chain)
7511       Chain = LChain;
7512     else if (Chain != LChain)
7513       return SDValue();
7514 
7515     // Loads must share the same base address
7516     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7517     int64_t ByteOffsetFromBase = 0;
7518     if (!Base)
7519       Base = Ptr;
7520     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7521       return SDValue();
7522 
7523     // Calculate the offset of the current byte from the base address
7524     ByteOffsetFromBase += MemoryByteOffset(*P);
7525     ByteOffsets[i] = ByteOffsetFromBase;
7526 
7527     // Remember the first byte load
7528     if (ByteOffsetFromBase < FirstOffset) {
7529       FirstByteProvider = P;
7530       FirstOffset = ByteOffsetFromBase;
7531     }
7532 
7533     Loads.insert(L);
7534   }
7535   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7536          "memory, so there must be at least one load which produces the value");
7537   assert(Base && "Base address of the accessed memory location must be set");
7538   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7539 
7540   bool NeedsZext = ZeroExtendedBytes > 0;
7541 
7542   EVT MemVT =
7543       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7544 
7545   if (!MemVT.isSimple())
7546     return SDValue();
7547 
7548   // Before legalize we can introduce too wide illegal loads which will be later
7549   // split into legal sized loads. This enables us to combine i64 load by i8
7550   // patterns to a couple of i32 loads on 32 bit targets.
7551   if (LegalOperations &&
7552       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7553                             MemVT))
7554     return SDValue();
7555 
7556   // Check if the bytes of the OR we are looking at match with either big or
7557   // little endian value load
7558   Optional<bool> IsBigEndian = isBigEndian(
7559       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7560   if (!IsBigEndian.hasValue())
7561     return SDValue();
7562 
7563   assert(FirstByteProvider && "must be set");
7564 
7565   // Ensure that the first byte is loaded from zero offset of the first load.
7566   // So the combined value can be loaded from the first load address.
7567   if (MemoryByteOffset(*FirstByteProvider) != 0)
7568     return SDValue();
7569   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7570 
7571   // The node we are looking at matches with the pattern, check if we can
7572   // replace it with a single (possibly zero-extended) load and bswap + shift if
7573   // needed.
7574 
7575   // If the load needs byte swap check if the target supports it
7576   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7577 
7578   // Before legalize we can introduce illegal bswaps which will be later
7579   // converted to an explicit bswap sequence. This way we end up with a single
7580   // load and byte shuffling instead of several loads and byte shuffling.
7581   // We do not introduce illegal bswaps when zero-extending as this tends to
7582   // introduce too many arithmetic instructions.
7583   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7584       !TLI.isOperationLegal(ISD::BSWAP, VT))
7585     return SDValue();
7586 
7587   // If we need to bswap and zero extend, we have to insert a shift. Check that
7588   // it is legal.
7589   if (NeedsBswap && NeedsZext && LegalOperations &&
7590       !TLI.isOperationLegal(ISD::SHL, VT))
7591     return SDValue();
7592 
7593   // Check that a load of the wide type is both allowed and fast on the target
7594   bool Fast = false;
7595   bool Allowed =
7596       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7597                              *FirstLoad->getMemOperand(), &Fast);
7598   if (!Allowed || !Fast)
7599     return SDValue();
7600 
7601   SDValue NewLoad =
7602       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7603                      Chain, FirstLoad->getBasePtr(),
7604                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7605 
7606   // Transfer chain users from old loads to the new load.
7607   for (LoadSDNode *L : Loads)
7608     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7609 
7610   if (!NeedsBswap)
7611     return NewLoad;
7612 
7613   SDValue ShiftedLoad =
7614       NeedsZext
7615           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7616                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7617                                                    SDLoc(N), LegalOperations))
7618           : NewLoad;
7619   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7620 }
7621 
7622 // If the target has andn, bsl, or a similar bit-select instruction,
7623 // we want to unfold masked merge, with canonical pattern of:
7624 //   |        A  |  |B|
7625 //   ((x ^ y) & m) ^ y
7626 //    |  D  |
7627 // Into:
7628 //   (x & m) | (y & ~m)
7629 // If y is a constant, and the 'andn' does not work with immediates,
7630 // we unfold into a different pattern:
7631 //   ~(~x & m) & (m | y)
7632 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7633 //       the very least that breaks andnpd / andnps patterns, and because those
7634 //       patterns are simplified in IR and shouldn't be created in the DAG
7635 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7636   assert(N->getOpcode() == ISD::XOR);
7637 
7638   // Don't touch 'not' (i.e. where y = -1).
7639   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7640     return SDValue();
7641 
7642   EVT VT = N->getValueType(0);
7643 
7644   // There are 3 commutable operators in the pattern,
7645   // so we have to deal with 8 possible variants of the basic pattern.
7646   SDValue X, Y, M;
7647   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7648     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7649       return false;
7650     SDValue Xor = And.getOperand(XorIdx);
7651     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7652       return false;
7653     SDValue Xor0 = Xor.getOperand(0);
7654     SDValue Xor1 = Xor.getOperand(1);
7655     // Don't touch 'not' (i.e. where y = -1).
7656     if (isAllOnesOrAllOnesSplat(Xor1))
7657       return false;
7658     if (Other == Xor0)
7659       std::swap(Xor0, Xor1);
7660     if (Other != Xor1)
7661       return false;
7662     X = Xor0;
7663     Y = Xor1;
7664     M = And.getOperand(XorIdx ? 0 : 1);
7665     return true;
7666   };
7667 
7668   SDValue N0 = N->getOperand(0);
7669   SDValue N1 = N->getOperand(1);
7670   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7671       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7672     return SDValue();
7673 
7674   // Don't do anything if the mask is constant. This should not be reachable.
7675   // InstCombine should have already unfolded this pattern, and DAGCombiner
7676   // probably shouldn't produce it, too.
7677   if (isa<ConstantSDNode>(M.getNode()))
7678     return SDValue();
7679 
7680   // We can transform if the target has AndNot
7681   if (!TLI.hasAndNot(M))
7682     return SDValue();
7683 
7684   SDLoc DL(N);
7685 
7686   // If Y is a constant, check that 'andn' works with immediates.
7687   if (!TLI.hasAndNot(Y)) {
7688     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7689     // If not, we need to do a bit more work to make sure andn is still used.
7690     SDValue NotX = DAG.getNOT(DL, X, VT);
7691     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7692     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7693     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7694     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7695   }
7696 
7697   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7698   SDValue NotM = DAG.getNOT(DL, M, VT);
7699   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7700 
7701   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7702 }
7703 
7704 SDValue DAGCombiner::visitXOR(SDNode *N) {
7705   SDValue N0 = N->getOperand(0);
7706   SDValue N1 = N->getOperand(1);
7707   EVT VT = N0.getValueType();
7708 
7709   // fold vector ops
7710   if (VT.isVector()) {
7711     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7712       return FoldedVOp;
7713 
7714     // fold (xor x, 0) -> x, vector edition
7715     if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
7716       return N1;
7717     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7718       return N0;
7719   }
7720 
7721   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7722   SDLoc DL(N);
7723   if (N0.isUndef() && N1.isUndef())
7724     return DAG.getConstant(0, DL, VT);
7725 
7726   // fold (xor x, undef) -> undef
7727   if (N0.isUndef())
7728     return N0;
7729   if (N1.isUndef())
7730     return N1;
7731 
7732   // fold (xor c1, c2) -> c1^c2
7733   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7734     return C;
7735 
7736   // canonicalize constant to RHS
7737   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7738      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7739     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7740 
7741   // fold (xor x, 0) -> x
7742   if (isNullConstant(N1))
7743     return N0;
7744 
7745   if (SDValue NewSel = foldBinOpIntoSelect(N))
7746     return NewSel;
7747 
7748   // reassociate xor
7749   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7750     return RXOR;
7751 
7752   // fold !(x cc y) -> (x !cc y)
7753   unsigned N0Opcode = N0.getOpcode();
7754   SDValue LHS, RHS, CC;
7755   if (TLI.isConstTrueVal(N1.getNode()) &&
7756       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7757     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7758                                                LHS.getValueType());
7759     if (!LegalOperations ||
7760         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7761       switch (N0Opcode) {
7762       default:
7763         llvm_unreachable("Unhandled SetCC Equivalent!");
7764       case ISD::SETCC:
7765         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7766       case ISD::SELECT_CC:
7767         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7768                                N0.getOperand(3), NotCC);
7769       case ISD::STRICT_FSETCC:
7770       case ISD::STRICT_FSETCCS: {
7771         if (N0.hasOneUse()) {
7772           // FIXME Can we handle multiple uses? Could we token factor the chain
7773           // results from the new/old setcc?
7774           SDValue SetCC =
7775               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7776                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7777           CombineTo(N, SetCC);
7778           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7779           recursivelyDeleteUnusedNodes(N0.getNode());
7780           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7781         }
7782         break;
7783       }
7784       }
7785     }
7786   }
7787 
7788   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7789   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7790       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7791     SDValue V = N0.getOperand(0);
7792     SDLoc DL0(N0);
7793     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7794                     DAG.getConstant(1, DL0, V.getValueType()));
7795     AddToWorklist(V.getNode());
7796     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7797   }
7798 
7799   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7800   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7801       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7802     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7803     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7804       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7805       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7806       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7807       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7808       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7809     }
7810   }
7811   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7812   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7813       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7814     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7815     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7816       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7817       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7818       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7819       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7820       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7821     }
7822   }
7823 
7824   // fold (not (neg x)) -> (add X, -1)
7825   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7826   // Y is a constant or the subtract has a single use.
7827   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7828       isNullConstant(N0.getOperand(0))) {
7829     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7830                        DAG.getAllOnesConstant(DL, VT));
7831   }
7832 
7833   // fold (not (add X, -1)) -> (neg X)
7834   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7835       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7836     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7837                        N0.getOperand(0));
7838   }
7839 
7840   // fold (xor (and x, y), y) -> (and (not x), y)
7841   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7842     SDValue X = N0.getOperand(0);
7843     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7844     AddToWorklist(NotX.getNode());
7845     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7846   }
7847 
7848   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7849     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7850     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7851     unsigned BitWidth = VT.getScalarSizeInBits();
7852     if (XorC && ShiftC) {
7853       // Don't crash on an oversized shift. We can not guarantee that a bogus
7854       // shift has been simplified to undef.
7855       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7856       if (ShiftAmt < BitWidth) {
7857         APInt Ones = APInt::getAllOnesValue(BitWidth);
7858         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7859         if (XorC->getAPIntValue() == Ones) {
7860           // If the xor constant is a shifted -1, do a 'not' before the shift:
7861           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7862           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7863           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7864           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7865         }
7866       }
7867     }
7868   }
7869 
7870   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7871   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7872     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7873     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7874     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7875       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7876       SDValue S0 = S.getOperand(0);
7877       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7878         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7879           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7880             return DAG.getNode(ISD::ABS, DL, VT, S0);
7881     }
7882   }
7883 
7884   // fold (xor x, x) -> 0
7885   if (N0 == N1)
7886     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7887 
7888   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7889   // Here is a concrete example of this equivalence:
7890   // i16   x ==  14
7891   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7892   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7893   //
7894   // =>
7895   //
7896   // i16     ~1      == 0b1111111111111110
7897   // i16 rol(~1, 14) == 0b1011111111111111
7898   //
7899   // Some additional tips to help conceptualize this transform:
7900   // - Try to see the operation as placing a single zero in a value of all ones.
7901   // - There exists no value for x which would allow the result to contain zero.
7902   // - Values of x larger than the bitwidth are undefined and do not require a
7903   //   consistent result.
7904   // - Pushing the zero left requires shifting one bits in from the right.
7905   // A rotate left of ~1 is a nice way of achieving the desired result.
7906   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7907       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7908     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7909                        N0.getOperand(1));
7910   }
7911 
7912   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7913   if (N0Opcode == N1.getOpcode())
7914     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7915       return V;
7916 
7917   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7918   if (SDValue MM = unfoldMaskedMerge(N))
7919     return MM;
7920 
7921   // Simplify the expression using non-local knowledge.
7922   if (SimplifyDemandedBits(SDValue(N, 0)))
7923     return SDValue(N, 0);
7924 
7925   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7926     return Combined;
7927 
7928   return SDValue();
7929 }
7930 
7931 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7932 /// shift-by-constant operand with identical opcode, we may be able to convert
7933 /// that into 2 independent shifts followed by the logic op. This is a
7934 /// throughput improvement.
7935 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7936   // Match a one-use bitwise logic op.
7937   SDValue LogicOp = Shift->getOperand(0);
7938   if (!LogicOp.hasOneUse())
7939     return SDValue();
7940 
7941   unsigned LogicOpcode = LogicOp.getOpcode();
7942   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7943       LogicOpcode != ISD::XOR)
7944     return SDValue();
7945 
7946   // Find a matching one-use shift by constant.
7947   unsigned ShiftOpcode = Shift->getOpcode();
7948   SDValue C1 = Shift->getOperand(1);
7949   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7950   assert(C1Node && "Expected a shift with constant operand");
7951   const APInt &C1Val = C1Node->getAPIntValue();
7952   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7953                              const APInt *&ShiftAmtVal) {
7954     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7955       return false;
7956 
7957     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7958     if (!ShiftCNode)
7959       return false;
7960 
7961     // Capture the shifted operand and shift amount value.
7962     ShiftOp = V.getOperand(0);
7963     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7964 
7965     // Shift amount types do not have to match their operand type, so check that
7966     // the constants are the same width.
7967     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7968       return false;
7969 
7970     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7971     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7972       return false;
7973 
7974     return true;
7975   };
7976 
7977   // Logic ops are commutative, so check each operand for a match.
7978   SDValue X, Y;
7979   const APInt *C0Val;
7980   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7981     Y = LogicOp.getOperand(1);
7982   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7983     Y = LogicOp.getOperand(0);
7984   else
7985     return SDValue();
7986 
7987   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7988   SDLoc DL(Shift);
7989   EVT VT = Shift->getValueType(0);
7990   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7991   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7992   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7993   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7994   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7995 }
7996 
7997 /// Handle transforms common to the three shifts, when the shift amount is a
7998 /// constant.
7999 /// We are looking for: (shift being one of shl/sra/srl)
8000 ///   shift (binop X, C0), C1
8001 /// And want to transform into:
8002 ///   binop (shift X, C1), (shift C0, C1)
8003 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
8004   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
8005 
8006   // Do not turn a 'not' into a regular xor.
8007   if (isBitwiseNot(N->getOperand(0)))
8008     return SDValue();
8009 
8010   // The inner binop must be one-use, since we want to replace it.
8011   SDValue LHS = N->getOperand(0);
8012   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
8013     return SDValue();
8014 
8015   // TODO: This is limited to early combining because it may reveal regressions
8016   //       otherwise. But since we just checked a target hook to see if this is
8017   //       desirable, that should have filtered out cases where this interferes
8018   //       with some other pattern matching.
8019   if (!LegalTypes)
8020     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
8021       return R;
8022 
8023   // We want to pull some binops through shifts, so that we have (and (shift))
8024   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
8025   // thing happens with address calculations, so it's important to canonicalize
8026   // it.
8027   switch (LHS.getOpcode()) {
8028   default:
8029     return SDValue();
8030   case ISD::OR:
8031   case ISD::XOR:
8032   case ISD::AND:
8033     break;
8034   case ISD::ADD:
8035     if (N->getOpcode() != ISD::SHL)
8036       return SDValue(); // only shl(add) not sr[al](add).
8037     break;
8038   }
8039 
8040   // We require the RHS of the binop to be a constant and not opaque as well.
8041   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
8042   if (!BinOpCst)
8043     return SDValue();
8044 
8045   // FIXME: disable this unless the input to the binop is a shift by a constant
8046   // or is copy/select. Enable this in other cases when figure out it's exactly
8047   // profitable.
8048   SDValue BinOpLHSVal = LHS.getOperand(0);
8049   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
8050                             BinOpLHSVal.getOpcode() == ISD::SRA ||
8051                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
8052                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
8053   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
8054                         BinOpLHSVal.getOpcode() == ISD::SELECT;
8055 
8056   if (!IsShiftByConstant && !IsCopyOrSelect)
8057     return SDValue();
8058 
8059   if (IsCopyOrSelect && N->hasOneUse())
8060     return SDValue();
8061 
8062   // Fold the constants, shifting the binop RHS by the shift amount.
8063   SDLoc DL(N);
8064   EVT VT = N->getValueType(0);
8065   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
8066                                N->getOperand(1));
8067   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
8068 
8069   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
8070                                  N->getOperand(1));
8071   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
8072 }
8073 
8074 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
8075   assert(N->getOpcode() == ISD::TRUNCATE);
8076   assert(N->getOperand(0).getOpcode() == ISD::AND);
8077 
8078   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
8079   EVT TruncVT = N->getValueType(0);
8080   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
8081       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
8082     SDValue N01 = N->getOperand(0).getOperand(1);
8083     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
8084       SDLoc DL(N);
8085       SDValue N00 = N->getOperand(0).getOperand(0);
8086       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
8087       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
8088       AddToWorklist(Trunc00.getNode());
8089       AddToWorklist(Trunc01.getNode());
8090       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
8091     }
8092   }
8093 
8094   return SDValue();
8095 }
8096 
8097 SDValue DAGCombiner::visitRotate(SDNode *N) {
8098   SDLoc dl(N);
8099   SDValue N0 = N->getOperand(0);
8100   SDValue N1 = N->getOperand(1);
8101   EVT VT = N->getValueType(0);
8102   unsigned Bitsize = VT.getScalarSizeInBits();
8103 
8104   // fold (rot x, 0) -> x
8105   if (isNullOrNullSplat(N1))
8106     return N0;
8107 
8108   // fold (rot x, c) -> x iff (c % BitSize) == 0
8109   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
8110     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
8111     if (DAG.MaskedValueIsZero(N1, ModuloMask))
8112       return N0;
8113   }
8114 
8115   // fold (rot x, c) -> (rot x, c % BitSize)
8116   bool OutOfRange = false;
8117   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
8118     OutOfRange |= C->getAPIntValue().uge(Bitsize);
8119     return true;
8120   };
8121   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
8122     EVT AmtVT = N1.getValueType();
8123     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
8124     if (SDValue Amt =
8125             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
8126       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
8127   }
8128 
8129   // rot i16 X, 8 --> bswap X
8130   auto *RotAmtC = isConstOrConstSplat(N1);
8131   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
8132       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
8133     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
8134 
8135   // Simplify the operands using demanded-bits information.
8136   if (SimplifyDemandedBits(SDValue(N, 0)))
8137     return SDValue(N, 0);
8138 
8139   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
8140   if (N1.getOpcode() == ISD::TRUNCATE &&
8141       N1.getOperand(0).getOpcode() == ISD::AND) {
8142     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8143       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
8144   }
8145 
8146   unsigned NextOp = N0.getOpcode();
8147   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
8148   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
8149     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
8150     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
8151     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
8152       EVT ShiftVT = C1->getValueType(0);
8153       bool SameSide = (N->getOpcode() == NextOp);
8154       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
8155       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
8156               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
8157         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
8158         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8159             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8160         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8161                            CombinedShiftNorm);
8162       }
8163     }
8164   }
8165   return SDValue();
8166 }
8167 
8168 SDValue DAGCombiner::visitSHL(SDNode *N) {
8169   SDValue N0 = N->getOperand(0);
8170   SDValue N1 = N->getOperand(1);
8171   if (SDValue V = DAG.simplifyShift(N0, N1))
8172     return V;
8173 
8174   EVT VT = N0.getValueType();
8175   EVT ShiftVT = N1.getValueType();
8176   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8177 
8178   // fold vector ops
8179   if (VT.isVector()) {
8180     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8181       return FoldedVOp;
8182 
8183     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8184     // If setcc produces all-one true value then:
8185     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8186     if (N1CV && N1CV->isConstant()) {
8187       if (N0.getOpcode() == ISD::AND) {
8188         SDValue N00 = N0->getOperand(0);
8189         SDValue N01 = N0->getOperand(1);
8190         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8191 
8192         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8193             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8194                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8195           if (SDValue C =
8196                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8197             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8198         }
8199       }
8200     }
8201   }
8202 
8203   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8204 
8205   // fold (shl c1, c2) -> c1<<c2
8206   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8207     return C;
8208 
8209   if (SDValue NewSel = foldBinOpIntoSelect(N))
8210     return NewSel;
8211 
8212   // if (shl x, c) is known to be zero, return 0
8213   if (DAG.MaskedValueIsZero(SDValue(N, 0),
8214                             APInt::getAllOnesValue(OpSizeInBits)))
8215     return DAG.getConstant(0, SDLoc(N), VT);
8216 
8217   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8218   if (N1.getOpcode() == ISD::TRUNCATE &&
8219       N1.getOperand(0).getOpcode() == ISD::AND) {
8220     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8221       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8222   }
8223 
8224   if (SimplifyDemandedBits(SDValue(N, 0)))
8225     return SDValue(N, 0);
8226 
8227   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8228   if (N0.getOpcode() == ISD::SHL) {
8229     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8230                                           ConstantSDNode *RHS) {
8231       APInt c1 = LHS->getAPIntValue();
8232       APInt c2 = RHS->getAPIntValue();
8233       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8234       return (c1 + c2).uge(OpSizeInBits);
8235     };
8236     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8237       return DAG.getConstant(0, SDLoc(N), VT);
8238 
8239     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8240                                        ConstantSDNode *RHS) {
8241       APInt c1 = LHS->getAPIntValue();
8242       APInt c2 = RHS->getAPIntValue();
8243       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8244       return (c1 + c2).ult(OpSizeInBits);
8245     };
8246     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8247       SDLoc DL(N);
8248       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8249       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8250     }
8251   }
8252 
8253   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8254   // For this to be valid, the second form must not preserve any of the bits
8255   // that are shifted out by the inner shift in the first form.  This means
8256   // the outer shift size must be >= the number of bits added by the ext.
8257   // As a corollary, we don't care what kind of ext it is.
8258   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8259        N0.getOpcode() == ISD::ANY_EXTEND ||
8260        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8261       N0.getOperand(0).getOpcode() == ISD::SHL) {
8262     SDValue N0Op0 = N0.getOperand(0);
8263     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8264     EVT InnerVT = N0Op0.getValueType();
8265     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8266 
8267     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8268                                                          ConstantSDNode *RHS) {
8269       APInt c1 = LHS->getAPIntValue();
8270       APInt c2 = RHS->getAPIntValue();
8271       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8272       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8273              (c1 + c2).uge(OpSizeInBits);
8274     };
8275     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8276                                   /*AllowUndefs*/ false,
8277                                   /*AllowTypeMismatch*/ true))
8278       return DAG.getConstant(0, SDLoc(N), VT);
8279 
8280     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8281                                                       ConstantSDNode *RHS) {
8282       APInt c1 = LHS->getAPIntValue();
8283       APInt c2 = RHS->getAPIntValue();
8284       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8285       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8286              (c1 + c2).ult(OpSizeInBits);
8287     };
8288     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8289                                   /*AllowUndefs*/ false,
8290                                   /*AllowTypeMismatch*/ true)) {
8291       SDLoc DL(N);
8292       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8293       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8294       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8295       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8296     }
8297   }
8298 
8299   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8300   // Only fold this if the inner zext has no other uses to avoid increasing
8301   // the total number of instructions.
8302   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8303       N0.getOperand(0).getOpcode() == ISD::SRL) {
8304     SDValue N0Op0 = N0.getOperand(0);
8305     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8306 
8307     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8308       APInt c1 = LHS->getAPIntValue();
8309       APInt c2 = RHS->getAPIntValue();
8310       zeroExtendToMatch(c1, c2);
8311       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8312     };
8313     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8314                                   /*AllowUndefs*/ false,
8315                                   /*AllowTypeMismatch*/ true)) {
8316       SDLoc DL(N);
8317       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8318       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8319       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8320       AddToWorklist(NewSHL.getNode());
8321       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8322     }
8323   }
8324 
8325   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8326   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8327   // TODO - support non-uniform vector shift amounts.
8328   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8329       N0->getFlags().hasExact()) {
8330     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8331       uint64_t C1 = N0C1->getZExtValue();
8332       uint64_t C2 = N1C->getZExtValue();
8333       SDLoc DL(N);
8334       if (C1 <= C2)
8335         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8336                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8337       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8338                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8339     }
8340   }
8341 
8342   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8343   //                               (and (srl x, (sub c1, c2), MASK)
8344   // Only fold this if the inner shift has no other uses -- if it does, folding
8345   // this will increase the total number of instructions.
8346   // TODO - drop hasOneUse requirement if c1 == c2?
8347   // TODO - support non-uniform vector shift amounts.
8348   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8349       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8350     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8351       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8352         uint64_t c1 = N0C1->getZExtValue();
8353         uint64_t c2 = N1C->getZExtValue();
8354         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8355         SDValue Shift;
8356         if (c2 > c1) {
8357           Mask <<= c2 - c1;
8358           SDLoc DL(N);
8359           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8360                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8361         } else {
8362           Mask.lshrInPlace(c1 - c2);
8363           SDLoc DL(N);
8364           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8365                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8366         }
8367         SDLoc DL(N0);
8368         return DAG.getNode(ISD::AND, DL, VT, Shift,
8369                            DAG.getConstant(Mask, DL, VT));
8370       }
8371     }
8372   }
8373 
8374   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8375   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8376       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8377     SDLoc DL(N);
8378     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8379     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8380     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8381   }
8382 
8383   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8384   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8385   // Variant of version done on multiply, except mul by a power of 2 is turned
8386   // into a shift.
8387   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8388       N0.getNode()->hasOneUse() &&
8389       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8390       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8391       TLI.isDesirableToCommuteWithShift(N, Level)) {
8392     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8393     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8394     AddToWorklist(Shl0.getNode());
8395     AddToWorklist(Shl1.getNode());
8396     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8397   }
8398 
8399   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8400   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8401       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8402       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8403     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8404     if (isConstantOrConstantVector(Shl))
8405       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8406   }
8407 
8408   if (N1C && !N1C->isOpaque())
8409     if (SDValue NewSHL = visitShiftByConstant(N))
8410       return NewSHL;
8411 
8412   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8413   if (N0.getOpcode() == ISD::VSCALE)
8414     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8415       const APInt &C0 = N0.getConstantOperandAPInt(0);
8416       const APInt &C1 = NC1->getAPIntValue();
8417       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8418     }
8419 
8420   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
8421   APInt ShlVal;
8422   if (N0.getOpcode() == ISD::STEP_VECTOR)
8423     if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
8424       const APInt &C0 = N0.getConstantOperandAPInt(0);
8425       EVT SVT = N0.getOperand(0).getValueType();
8426       SDValue NewStep = DAG.getConstant(
8427           C0 << ShlVal.sextOrTrunc(SVT.getSizeInBits()), SDLoc(N), SVT);
8428       return DAG.getStepVector(SDLoc(N), VT, NewStep);
8429     }
8430 
8431   return SDValue();
8432 }
8433 
8434 // Transform a right shift of a multiply into a multiply-high.
8435 // Examples:
8436 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8437 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8438 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8439                                   const TargetLowering &TLI) {
8440   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8441          "SRL or SRA node is required here!");
8442 
8443   // Check the shift amount. Proceed with the transformation if the shift
8444   // amount is constant.
8445   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8446   if (!ShiftAmtSrc)
8447     return SDValue();
8448 
8449   SDLoc DL(N);
8450 
8451   // The operation feeding into the shift must be a multiply.
8452   SDValue ShiftOperand = N->getOperand(0);
8453   if (ShiftOperand.getOpcode() != ISD::MUL)
8454     return SDValue();
8455 
8456   // Both operands must be equivalent extend nodes.
8457   SDValue LeftOp = ShiftOperand.getOperand(0);
8458   SDValue RightOp = ShiftOperand.getOperand(1);
8459   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8460   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8461 
8462   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8463     return SDValue();
8464 
8465   EVT WideVT1 = LeftOp.getValueType();
8466   EVT WideVT2 = RightOp.getValueType();
8467   (void)WideVT2;
8468   // Proceed with the transformation if the wide types match.
8469   assert((WideVT1 == WideVT2) &&
8470          "Cannot have a multiply node with two different operand types.");
8471 
8472   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8473   // Check that the two extend nodes are the same type.
8474   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8475     return SDValue();
8476 
8477   // Proceed with the transformation if the wide type is twice as large
8478   // as the narrow type.
8479   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8480   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8481     return SDValue();
8482 
8483   // Check the shift amount with the narrow type size.
8484   // Proceed with the transformation if the shift amount is the width
8485   // of the narrow type.
8486   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8487   if (ShiftAmt != NarrowVTSize)
8488     return SDValue();
8489 
8490   // If the operation feeding into the MUL is a sign extend (sext),
8491   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8492   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8493 
8494   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8495   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8496     return SDValue();
8497 
8498   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8499                                RightOp.getOperand(0));
8500   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8501                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8502 }
8503 
8504 SDValue DAGCombiner::visitSRA(SDNode *N) {
8505   SDValue N0 = N->getOperand(0);
8506   SDValue N1 = N->getOperand(1);
8507   if (SDValue V = DAG.simplifyShift(N0, N1))
8508     return V;
8509 
8510   EVT VT = N0.getValueType();
8511   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8512 
8513   // Arithmetic shifting an all-sign-bit value is a no-op.
8514   // fold (sra 0, x) -> 0
8515   // fold (sra -1, x) -> -1
8516   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8517     return N0;
8518 
8519   // fold vector ops
8520   if (VT.isVector())
8521     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8522       return FoldedVOp;
8523 
8524   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8525 
8526   // fold (sra c1, c2) -> (sra c1, c2)
8527   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8528     return C;
8529 
8530   if (SDValue NewSel = foldBinOpIntoSelect(N))
8531     return NewSel;
8532 
8533   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8534   // sext_inreg.
8535   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8536     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8537     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8538     if (VT.isVector())
8539       ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT,
8540                                VT.getVectorElementCount());
8541     if (!LegalOperations ||
8542         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8543         TargetLowering::Legal)
8544       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8545                          N0.getOperand(0), DAG.getValueType(ExtVT));
8546     // Even if we can't convert to sext_inreg, we might be able to remove
8547     // this shift pair if the input is already sign extended.
8548     if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue())
8549       return N0.getOperand(0);
8550   }
8551 
8552   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8553   // clamp (add c1, c2) to max shift.
8554   if (N0.getOpcode() == ISD::SRA) {
8555     SDLoc DL(N);
8556     EVT ShiftVT = N1.getValueType();
8557     EVT ShiftSVT = ShiftVT.getScalarType();
8558     SmallVector<SDValue, 16> ShiftValues;
8559 
8560     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8561       APInt c1 = LHS->getAPIntValue();
8562       APInt c2 = RHS->getAPIntValue();
8563       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8564       APInt Sum = c1 + c2;
8565       unsigned ShiftSum =
8566           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8567       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8568       return true;
8569     };
8570     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8571       SDValue ShiftValue;
8572       if (VT.isVector())
8573         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8574       else
8575         ShiftValue = ShiftValues[0];
8576       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8577     }
8578   }
8579 
8580   // fold (sra (shl X, m), (sub result_size, n))
8581   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8582   // result_size - n != m.
8583   // If truncate is free for the target sext(shl) is likely to result in better
8584   // code.
8585   if (N0.getOpcode() == ISD::SHL && N1C) {
8586     // Get the two constanst of the shifts, CN0 = m, CN = n.
8587     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8588     if (N01C) {
8589       LLVMContext &Ctx = *DAG.getContext();
8590       // Determine what the truncate's result bitsize and type would be.
8591       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8592 
8593       if (VT.isVector())
8594         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8595 
8596       // Determine the residual right-shift amount.
8597       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8598 
8599       // If the shift is not a no-op (in which case this should be just a sign
8600       // extend already), the truncated to type is legal, sign_extend is legal
8601       // on that type, and the truncate to that type is both legal and free,
8602       // perform the transform.
8603       if ((ShiftAmt > 0) &&
8604           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8605           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8606           TLI.isTruncateFree(VT, TruncVT)) {
8607         SDLoc DL(N);
8608         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8609             getShiftAmountTy(N0.getOperand(0).getValueType()));
8610         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8611                                     N0.getOperand(0), Amt);
8612         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8613                                     Shift);
8614         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8615                            N->getValueType(0), Trunc);
8616       }
8617     }
8618   }
8619 
8620   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8621   //   sra (add (shl X, N1C), AddC), N1C -->
8622   //   sext (add (trunc X to (width - N1C)), AddC')
8623   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8624       N0.getOperand(0).getOpcode() == ISD::SHL &&
8625       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8626     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8627       SDValue Shl = N0.getOperand(0);
8628       // Determine what the truncate's type would be and ask the target if that
8629       // is a free operation.
8630       LLVMContext &Ctx = *DAG.getContext();
8631       unsigned ShiftAmt = N1C->getZExtValue();
8632       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8633       if (VT.isVector())
8634         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
8635 
8636       // TODO: The simple type check probably belongs in the default hook
8637       //       implementation and/or target-specific overrides (because
8638       //       non-simple types likely require masking when legalized), but that
8639       //       restriction may conflict with other transforms.
8640       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8641           TLI.isTruncateFree(VT, TruncVT)) {
8642         SDLoc DL(N);
8643         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8644         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8645                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8646         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8647         return DAG.getSExtOrTrunc(Add, DL, VT);
8648       }
8649     }
8650   }
8651 
8652   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8653   if (N1.getOpcode() == ISD::TRUNCATE &&
8654       N1.getOperand(0).getOpcode() == ISD::AND) {
8655     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8656       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8657   }
8658 
8659   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8660   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8661   //      if c1 is equal to the number of bits the trunc removes
8662   // TODO - support non-uniform vector shift amounts.
8663   if (N0.getOpcode() == ISD::TRUNCATE &&
8664       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8665        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8666       N0.getOperand(0).hasOneUse() &&
8667       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8668     SDValue N0Op0 = N0.getOperand(0);
8669     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8670       EVT LargeVT = N0Op0.getValueType();
8671       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8672       if (LargeShift->getAPIntValue() == TruncBits) {
8673         SDLoc DL(N);
8674         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8675                                       getShiftAmountTy(LargeVT));
8676         SDValue SRA =
8677             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8678         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8679       }
8680     }
8681   }
8682 
8683   // Simplify, based on bits shifted out of the LHS.
8684   if (SimplifyDemandedBits(SDValue(N, 0)))
8685     return SDValue(N, 0);
8686 
8687   // If the sign bit is known to be zero, switch this to a SRL.
8688   if (DAG.SignBitIsZero(N0))
8689     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8690 
8691   if (N1C && !N1C->isOpaque())
8692     if (SDValue NewSRA = visitShiftByConstant(N))
8693       return NewSRA;
8694 
8695   // Try to transform this shift into a multiply-high if
8696   // it matches the appropriate pattern detected in combineShiftToMULH.
8697   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8698     return MULH;
8699 
8700   return SDValue();
8701 }
8702 
8703 SDValue DAGCombiner::visitSRL(SDNode *N) {
8704   SDValue N0 = N->getOperand(0);
8705   SDValue N1 = N->getOperand(1);
8706   if (SDValue V = DAG.simplifyShift(N0, N1))
8707     return V;
8708 
8709   EVT VT = N0.getValueType();
8710   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8711 
8712   // fold vector ops
8713   if (VT.isVector())
8714     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8715       return FoldedVOp;
8716 
8717   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8718 
8719   // fold (srl c1, c2) -> c1 >>u c2
8720   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8721     return C;
8722 
8723   if (SDValue NewSel = foldBinOpIntoSelect(N))
8724     return NewSel;
8725 
8726   // if (srl x, c) is known to be zero, return 0
8727   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8728                                    APInt::getAllOnesValue(OpSizeInBits)))
8729     return DAG.getConstant(0, SDLoc(N), VT);
8730 
8731   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8732   if (N0.getOpcode() == ISD::SRL) {
8733     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8734                                           ConstantSDNode *RHS) {
8735       APInt c1 = LHS->getAPIntValue();
8736       APInt c2 = RHS->getAPIntValue();
8737       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8738       return (c1 + c2).uge(OpSizeInBits);
8739     };
8740     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8741       return DAG.getConstant(0, SDLoc(N), VT);
8742 
8743     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8744                                        ConstantSDNode *RHS) {
8745       APInt c1 = LHS->getAPIntValue();
8746       APInt c2 = RHS->getAPIntValue();
8747       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8748       return (c1 + c2).ult(OpSizeInBits);
8749     };
8750     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8751       SDLoc DL(N);
8752       EVT ShiftVT = N1.getValueType();
8753       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8754       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8755     }
8756   }
8757 
8758   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8759       N0.getOperand(0).getOpcode() == ISD::SRL) {
8760     SDValue InnerShift = N0.getOperand(0);
8761     // TODO - support non-uniform vector shift amounts.
8762     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8763       uint64_t c1 = N001C->getZExtValue();
8764       uint64_t c2 = N1C->getZExtValue();
8765       EVT InnerShiftVT = InnerShift.getValueType();
8766       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8767       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8768       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8769       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8770       if (c1 + OpSizeInBits == InnerShiftSize) {
8771         SDLoc DL(N);
8772         if (c1 + c2 >= InnerShiftSize)
8773           return DAG.getConstant(0, DL, VT);
8774         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8775         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8776                                        InnerShift.getOperand(0), NewShiftAmt);
8777         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8778       }
8779       // In the more general case, we can clear the high bits after the shift:
8780       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8781       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8782           c1 + c2 < InnerShiftSize) {
8783         SDLoc DL(N);
8784         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8785         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8786                                        InnerShift.getOperand(0), NewShiftAmt);
8787         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8788                                                             OpSizeInBits - c2),
8789                                        DL, InnerShiftVT);
8790         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8791         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8792       }
8793     }
8794   }
8795 
8796   // fold (srl (shl x, c), c) -> (and x, cst2)
8797   // TODO - (srl (shl x, c1), c2).
8798   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8799       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8800     SDLoc DL(N);
8801     SDValue Mask =
8802         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8803     AddToWorklist(Mask.getNode());
8804     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8805   }
8806 
8807   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8808   // TODO - support non-uniform vector shift amounts.
8809   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8810     // Shifting in all undef bits?
8811     EVT SmallVT = N0.getOperand(0).getValueType();
8812     unsigned BitSize = SmallVT.getScalarSizeInBits();
8813     if (N1C->getAPIntValue().uge(BitSize))
8814       return DAG.getUNDEF(VT);
8815 
8816     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8817       uint64_t ShiftAmt = N1C->getZExtValue();
8818       SDLoc DL0(N0);
8819       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8820                                        N0.getOperand(0),
8821                           DAG.getConstant(ShiftAmt, DL0,
8822                                           getShiftAmountTy(SmallVT)));
8823       AddToWorklist(SmallShift.getNode());
8824       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8825       SDLoc DL(N);
8826       return DAG.getNode(ISD::AND, DL, VT,
8827                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8828                          DAG.getConstant(Mask, DL, VT));
8829     }
8830   }
8831 
8832   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8833   // bit, which is unmodified by sra.
8834   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8835     if (N0.getOpcode() == ISD::SRA)
8836       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8837   }
8838 
8839   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8840   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8841       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8842     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8843 
8844     // If any of the input bits are KnownOne, then the input couldn't be all
8845     // zeros, thus the result of the srl will always be zero.
8846     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8847 
8848     // If all of the bits input the to ctlz node are known to be zero, then
8849     // the result of the ctlz is "32" and the result of the shift is one.
8850     APInt UnknownBits = ~Known.Zero;
8851     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8852 
8853     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8854     if (UnknownBits.isPowerOf2()) {
8855       // Okay, we know that only that the single bit specified by UnknownBits
8856       // could be set on input to the CTLZ node. If this bit is set, the SRL
8857       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8858       // to an SRL/XOR pair, which is likely to simplify more.
8859       unsigned ShAmt = UnknownBits.countTrailingZeros();
8860       SDValue Op = N0.getOperand(0);
8861 
8862       if (ShAmt) {
8863         SDLoc DL(N0);
8864         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8865                   DAG.getConstant(ShAmt, DL,
8866                                   getShiftAmountTy(Op.getValueType())));
8867         AddToWorklist(Op.getNode());
8868       }
8869 
8870       SDLoc DL(N);
8871       return DAG.getNode(ISD::XOR, DL, VT,
8872                          Op, DAG.getConstant(1, DL, VT));
8873     }
8874   }
8875 
8876   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8877   if (N1.getOpcode() == ISD::TRUNCATE &&
8878       N1.getOperand(0).getOpcode() == ISD::AND) {
8879     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8880       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8881   }
8882 
8883   // fold operands of srl based on knowledge that the low bits are not
8884   // demanded.
8885   if (SimplifyDemandedBits(SDValue(N, 0)))
8886     return SDValue(N, 0);
8887 
8888   if (N1C && !N1C->isOpaque())
8889     if (SDValue NewSRL = visitShiftByConstant(N))
8890       return NewSRL;
8891 
8892   // Attempt to convert a srl of a load into a narrower zero-extending load.
8893   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8894     return NarrowLoad;
8895 
8896   // Here is a common situation. We want to optimize:
8897   //
8898   //   %a = ...
8899   //   %b = and i32 %a, 2
8900   //   %c = srl i32 %b, 1
8901   //   brcond i32 %c ...
8902   //
8903   // into
8904   //
8905   //   %a = ...
8906   //   %b = and %a, 2
8907   //   %c = setcc eq %b, 0
8908   //   brcond %c ...
8909   //
8910   // However when after the source operand of SRL is optimized into AND, the SRL
8911   // itself may not be optimized further. Look for it and add the BRCOND into
8912   // the worklist.
8913   if (N->hasOneUse()) {
8914     SDNode *Use = *N->use_begin();
8915     if (Use->getOpcode() == ISD::BRCOND)
8916       AddToWorklist(Use);
8917     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8918       // Also look pass the truncate.
8919       Use = *Use->use_begin();
8920       if (Use->getOpcode() == ISD::BRCOND)
8921         AddToWorklist(Use);
8922     }
8923   }
8924 
8925   // Try to transform this shift into a multiply-high if
8926   // it matches the appropriate pattern detected in combineShiftToMULH.
8927   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8928     return MULH;
8929 
8930   return SDValue();
8931 }
8932 
8933 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8934   EVT VT = N->getValueType(0);
8935   SDValue N0 = N->getOperand(0);
8936   SDValue N1 = N->getOperand(1);
8937   SDValue N2 = N->getOperand(2);
8938   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8939   unsigned BitWidth = VT.getScalarSizeInBits();
8940 
8941   // fold (fshl N0, N1, 0) -> N0
8942   // fold (fshr N0, N1, 0) -> N1
8943   if (isPowerOf2_32(BitWidth))
8944     if (DAG.MaskedValueIsZero(
8945             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8946       return IsFSHL ? N0 : N1;
8947 
8948   auto IsUndefOrZero = [](SDValue V) {
8949     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8950   };
8951 
8952   // TODO - support non-uniform vector shift amounts.
8953   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8954     EVT ShAmtTy = N2.getValueType();
8955 
8956     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8957     if (Cst->getAPIntValue().uge(BitWidth)) {
8958       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8959       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8960                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8961     }
8962 
8963     unsigned ShAmt = Cst->getZExtValue();
8964     if (ShAmt == 0)
8965       return IsFSHL ? N0 : N1;
8966 
8967     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8968     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8969     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8970     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8971     if (IsUndefOrZero(N0))
8972       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8973                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8974                                          SDLoc(N), ShAmtTy));
8975     if (IsUndefOrZero(N1))
8976       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8977                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8978                                          SDLoc(N), ShAmtTy));
8979 
8980     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8981     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8982     // TODO - bigendian support once we have test coverage.
8983     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8984     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8985     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8986         !DAG.getDataLayout().isBigEndian()) {
8987       auto *LHS = dyn_cast<LoadSDNode>(N0);
8988       auto *RHS = dyn_cast<LoadSDNode>(N1);
8989       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8990           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8991           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8992           ISD::isNON_EXTLoad(LHS)) {
8993         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8994           SDLoc DL(RHS);
8995           uint64_t PtrOff =
8996               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8997           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8998           bool Fast = false;
8999           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
9000                                      RHS->getAddressSpace(), NewAlign,
9001                                      RHS->getMemOperand()->getFlags(), &Fast) &&
9002               Fast) {
9003             SDValue NewPtr = DAG.getMemBasePlusOffset(
9004                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
9005             AddToWorklist(NewPtr.getNode());
9006             SDValue Load = DAG.getLoad(
9007                 VT, DL, RHS->getChain(), NewPtr,
9008                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9009                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
9010             // Replace the old load's chain with the new load's chain.
9011             WorklistRemover DeadNodes(*this);
9012             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
9013             return Load;
9014           }
9015         }
9016       }
9017     }
9018   }
9019 
9020   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
9021   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
9022   // iff We know the shift amount is in range.
9023   // TODO: when is it worth doing SUB(BW, N2) as well?
9024   if (isPowerOf2_32(BitWidth)) {
9025     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
9026     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9027       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
9028     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
9029       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
9030   }
9031 
9032   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
9033   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
9034   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
9035   // is legal as well we might be better off avoiding non-constant (BW - N2).
9036   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
9037   if (N0 == N1 && hasOperation(RotOpc, VT))
9038     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
9039 
9040   // Simplify, based on bits shifted out of N0/N1.
9041   if (SimplifyDemandedBits(SDValue(N, 0)))
9042     return SDValue(N, 0);
9043 
9044   return SDValue();
9045 }
9046 
9047 SDValue DAGCombiner::visitABS(SDNode *N) {
9048   SDValue N0 = N->getOperand(0);
9049   EVT VT = N->getValueType(0);
9050 
9051   // fold (abs c1) -> c2
9052   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9053     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
9054   // fold (abs (abs x)) -> (abs x)
9055   if (N0.getOpcode() == ISD::ABS)
9056     return N0;
9057   // fold (abs x) -> x iff not-negative
9058   if (DAG.SignBitIsZero(N0))
9059     return N0;
9060   return SDValue();
9061 }
9062 
9063 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
9064   SDValue N0 = N->getOperand(0);
9065   EVT VT = N->getValueType(0);
9066 
9067   // fold (bswap c1) -> c2
9068   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9069     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
9070   // fold (bswap (bswap x)) -> x
9071   if (N0.getOpcode() == ISD::BSWAP)
9072     return N0->getOperand(0);
9073   return SDValue();
9074 }
9075 
9076 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
9077   SDValue N0 = N->getOperand(0);
9078   EVT VT = N->getValueType(0);
9079 
9080   // fold (bitreverse c1) -> c2
9081   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9082     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
9083   // fold (bitreverse (bitreverse x)) -> x
9084   if (N0.getOpcode() == ISD::BITREVERSE)
9085     return N0.getOperand(0);
9086   return SDValue();
9087 }
9088 
9089 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
9090   SDValue N0 = N->getOperand(0);
9091   EVT VT = N->getValueType(0);
9092 
9093   // fold (ctlz c1) -> c2
9094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9095     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
9096 
9097   // If the value is known never to be zero, switch to the undef version.
9098   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
9099     if (DAG.isKnownNeverZero(N0))
9100       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9101   }
9102 
9103   return SDValue();
9104 }
9105 
9106 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
9107   SDValue N0 = N->getOperand(0);
9108   EVT VT = N->getValueType(0);
9109 
9110   // fold (ctlz_zero_undef c1) -> c2
9111   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9112     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9113   return SDValue();
9114 }
9115 
9116 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
9117   SDValue N0 = N->getOperand(0);
9118   EVT VT = N->getValueType(0);
9119 
9120   // fold (cttz c1) -> c2
9121   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9122     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
9123 
9124   // If the value is known never to be zero, switch to the undef version.
9125   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
9126     if (DAG.isKnownNeverZero(N0))
9127       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9128   }
9129 
9130   return SDValue();
9131 }
9132 
9133 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
9134   SDValue N0 = N->getOperand(0);
9135   EVT VT = N->getValueType(0);
9136 
9137   // fold (cttz_zero_undef c1) -> c2
9138   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9139     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
9140   return SDValue();
9141 }
9142 
9143 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
9144   SDValue N0 = N->getOperand(0);
9145   EVT VT = N->getValueType(0);
9146 
9147   // fold (ctpop c1) -> c2
9148   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9149     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
9150   return SDValue();
9151 }
9152 
9153 // FIXME: This should be checking for no signed zeros on individual operands, as
9154 // well as no nans.
9155 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
9156                                          SDValue RHS,
9157                                          const TargetLowering &TLI) {
9158   const TargetOptions &Options = DAG.getTarget().Options;
9159   EVT VT = LHS.getValueType();
9160 
9161   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
9162          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
9163          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
9164 }
9165 
9166 /// Generate Min/Max node
9167 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
9168                                    SDValue RHS, SDValue True, SDValue False,
9169                                    ISD::CondCode CC, const TargetLowering &TLI,
9170                                    SelectionDAG &DAG) {
9171   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
9172     return SDValue();
9173 
9174   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9175   switch (CC) {
9176   case ISD::SETOLT:
9177   case ISD::SETOLE:
9178   case ISD::SETLT:
9179   case ISD::SETLE:
9180   case ISD::SETULT:
9181   case ISD::SETULE: {
9182     // Since it's known never nan to get here already, either fminnum or
9183     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9184     // expanded in terms of it.
9185     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9186     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9187       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9188 
9189     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9190     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9191       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9192     return SDValue();
9193   }
9194   case ISD::SETOGT:
9195   case ISD::SETOGE:
9196   case ISD::SETGT:
9197   case ISD::SETGE:
9198   case ISD::SETUGT:
9199   case ISD::SETUGE: {
9200     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9201     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9202       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9203 
9204     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9205     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9206       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9207     return SDValue();
9208   }
9209   default:
9210     return SDValue();
9211   }
9212 }
9213 
9214 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9215 /// the condition operand sign-bit across the value width and use it as a mask.
9216 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9217   SDValue Cond = N->getOperand(0);
9218   SDValue C1 = N->getOperand(1);
9219   SDValue C2 = N->getOperand(2);
9220   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
9221          "Expected select-of-constants");
9222 
9223   EVT VT = N->getValueType(0);
9224   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9225       VT != Cond.getOperand(0).getValueType())
9226     return SDValue();
9227 
9228   // The inverted-condition + commuted-select variants of these patterns are
9229   // canonicalized to these forms in IR.
9230   SDValue X = Cond.getOperand(0);
9231   SDValue CondC = Cond.getOperand(1);
9232   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9233   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9234       isAllOnesOrAllOnesSplat(C2)) {
9235     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9236     SDLoc DL(N);
9237     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9238     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9239     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9240   }
9241   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9242     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9243     SDLoc DL(N);
9244     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9245     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9246     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9247   }
9248   return SDValue();
9249 }
9250 
9251 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9252   SDValue Cond = N->getOperand(0);
9253   SDValue N1 = N->getOperand(1);
9254   SDValue N2 = N->getOperand(2);
9255   EVT VT = N->getValueType(0);
9256   EVT CondVT = Cond.getValueType();
9257   SDLoc DL(N);
9258 
9259   if (!VT.isInteger())
9260     return SDValue();
9261 
9262   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9263   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9264   if (!C1 || !C2)
9265     return SDValue();
9266 
9267   // Only do this before legalization to avoid conflicting with target-specific
9268   // transforms in the other direction (create a select from a zext/sext). There
9269   // is also a target-independent combine here in DAGCombiner in the other
9270   // direction for (select Cond, -1, 0) when the condition is not i1.
9271   if (CondVT == MVT::i1 && !LegalOperations) {
9272     if (C1->isNullValue() && C2->isOne()) {
9273       // select Cond, 0, 1 --> zext (!Cond)
9274       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9275       if (VT != MVT::i1)
9276         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9277       return NotCond;
9278     }
9279     if (C1->isNullValue() && C2->isAllOnesValue()) {
9280       // select Cond, 0, -1 --> sext (!Cond)
9281       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9282       if (VT != MVT::i1)
9283         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9284       return NotCond;
9285     }
9286     if (C1->isOne() && C2->isNullValue()) {
9287       // select Cond, 1, 0 --> zext (Cond)
9288       if (VT != MVT::i1)
9289         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9290       return Cond;
9291     }
9292     if (C1->isAllOnesValue() && C2->isNullValue()) {
9293       // select Cond, -1, 0 --> sext (Cond)
9294       if (VT != MVT::i1)
9295         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9296       return Cond;
9297     }
9298 
9299     // Use a target hook because some targets may prefer to transform in the
9300     // other direction.
9301     if (TLI.convertSelectOfConstantsToMath(VT)) {
9302       // For any constants that differ by 1, we can transform the select into an
9303       // extend and add.
9304       const APInt &C1Val = C1->getAPIntValue();
9305       const APInt &C2Val = C2->getAPIntValue();
9306       if (C1Val - 1 == C2Val) {
9307         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9308         if (VT != MVT::i1)
9309           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9310         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9311       }
9312       if (C1Val + 1 == C2Val) {
9313         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9314         if (VT != MVT::i1)
9315           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9316         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9317       }
9318 
9319       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9320       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9321         if (VT != MVT::i1)
9322           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9323         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9324         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9325       }
9326 
9327       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9328         return V;
9329     }
9330 
9331     return SDValue();
9332   }
9333 
9334   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9335   // We can't do this reliably if integer based booleans have different contents
9336   // to floating point based booleans. This is because we can't tell whether we
9337   // have an integer-based boolean or a floating-point-based boolean unless we
9338   // can find the SETCC that produced it and inspect its operands. This is
9339   // fairly easy if C is the SETCC node, but it can potentially be
9340   // undiscoverable (or not reasonably discoverable). For example, it could be
9341   // in another basic block or it could require searching a complicated
9342   // expression.
9343   if (CondVT.isInteger() &&
9344       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9345           TargetLowering::ZeroOrOneBooleanContent &&
9346       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9347           TargetLowering::ZeroOrOneBooleanContent &&
9348       C1->isNullValue() && C2->isOne()) {
9349     SDValue NotCond =
9350         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9351     if (VT.bitsEq(CondVT))
9352       return NotCond;
9353     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9354   }
9355 
9356   return SDValue();
9357 }
9358 
9359 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
9360   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
9361          "Expected a (v)select");
9362   SDValue Cond = N->getOperand(0);
9363   SDValue T = N->getOperand(1), F = N->getOperand(2);
9364   EVT VT = N->getValueType(0);
9365   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
9366     return SDValue();
9367 
9368   // select Cond, Cond, F --> or Cond, F
9369   // select Cond, 1, F    --> or Cond, F
9370   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
9371     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
9372 
9373   // select Cond, T, Cond --> and Cond, T
9374   // select Cond, T, 0    --> and Cond, T
9375   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
9376     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
9377 
9378   // select Cond, T, 1 --> or (not Cond), T
9379   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
9380     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9381     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
9382   }
9383 
9384   // select Cond, 0, F --> and (not Cond), F
9385   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
9386     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
9387     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
9388   }
9389 
9390   return SDValue();
9391 }
9392 
9393 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9394   SDValue N0 = N->getOperand(0);
9395   SDValue N1 = N->getOperand(1);
9396   SDValue N2 = N->getOperand(2);
9397   EVT VT = N->getValueType(0);
9398   EVT VT0 = N0.getValueType();
9399   SDLoc DL(N);
9400   SDNodeFlags Flags = N->getFlags();
9401 
9402   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9403     return V;
9404 
9405   if (SDValue V = foldSelectOfConstants(N))
9406     return V;
9407 
9408   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9409     return V;
9410 
9411   // If we can fold this based on the true/false value, do so.
9412   if (SimplifySelectOps(N, N1, N2))
9413     return SDValue(N, 0); // Don't revisit N.
9414 
9415   if (VT0 == MVT::i1) {
9416     // The code in this block deals with the following 2 equivalences:
9417     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9418     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9419     // The target can specify its preferred form with the
9420     // shouldNormalizeToSelectSequence() callback. However we always transform
9421     // to the right anyway if we find the inner select exists in the DAG anyway
9422     // and we always transform to the left side if we know that we can further
9423     // optimize the combination of the conditions.
9424     bool normalizeToSequence =
9425         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9426     // select (and Cond0, Cond1), X, Y
9427     //   -> select Cond0, (select Cond1, X, Y), Y
9428     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9429       SDValue Cond0 = N0->getOperand(0);
9430       SDValue Cond1 = N0->getOperand(1);
9431       SDValue InnerSelect =
9432           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9433       if (normalizeToSequence || !InnerSelect.use_empty())
9434         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9435                            InnerSelect, N2, Flags);
9436       // Cleanup on failure.
9437       if (InnerSelect.use_empty())
9438         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9439     }
9440     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9441     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9442       SDValue Cond0 = N0->getOperand(0);
9443       SDValue Cond1 = N0->getOperand(1);
9444       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9445                                         Cond1, N1, N2, Flags);
9446       if (normalizeToSequence || !InnerSelect.use_empty())
9447         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9448                            InnerSelect, Flags);
9449       // Cleanup on failure.
9450       if (InnerSelect.use_empty())
9451         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9452     }
9453 
9454     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9455     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9456       SDValue N1_0 = N1->getOperand(0);
9457       SDValue N1_1 = N1->getOperand(1);
9458       SDValue N1_2 = N1->getOperand(2);
9459       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9460         // Create the actual and node if we can generate good code for it.
9461         if (!normalizeToSequence) {
9462           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9463           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9464                              N2, Flags);
9465         }
9466         // Otherwise see if we can optimize the "and" to a better pattern.
9467         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9468           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9469                              N2, Flags);
9470         }
9471       }
9472     }
9473     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9474     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9475       SDValue N2_0 = N2->getOperand(0);
9476       SDValue N2_1 = N2->getOperand(1);
9477       SDValue N2_2 = N2->getOperand(2);
9478       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9479         // Create the actual or node if we can generate good code for it.
9480         if (!normalizeToSequence) {
9481           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9482           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9483                              N2_2, Flags);
9484         }
9485         // Otherwise see if we can optimize to a better pattern.
9486         if (SDValue Combined = visitORLike(N0, N2_0, N))
9487           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9488                              N2_2, Flags);
9489       }
9490     }
9491   }
9492 
9493   // select (not Cond), N1, N2 -> select Cond, N2, N1
9494   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9495     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9496     SelectOp->setFlags(Flags);
9497     return SelectOp;
9498   }
9499 
9500   // Fold selects based on a setcc into other things, such as min/max/abs.
9501   if (N0.getOpcode() == ISD::SETCC) {
9502     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9503     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9504 
9505     // select (fcmp lt x, y), x, y -> fminnum x, y
9506     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9507     //
9508     // This is OK if we don't care what happens if either operand is a NaN.
9509     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9510       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9511                                                 CC, TLI, DAG))
9512         return FMinMax;
9513 
9514     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9515     // This is conservatively limited to pre-legal-operations to give targets
9516     // a chance to reverse the transform if they want to do that. Also, it is
9517     // unlikely that the pattern would be formed late, so it's probably not
9518     // worth going through the other checks.
9519     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9520         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9521         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9522       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9523       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9524       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9525         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9526         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9527         //
9528         // The IR equivalent of this transform would have this form:
9529         //   %a = add %x, C
9530         //   %c = icmp ugt %x, ~C
9531         //   %r = select %c, -1, %a
9532         //   =>
9533         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9534         //   %u0 = extractvalue %u, 0
9535         //   %u1 = extractvalue %u, 1
9536         //   %r = select %u1, -1, %u0
9537         SDVTList VTs = DAG.getVTList(VT, VT0);
9538         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9539         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9540       }
9541     }
9542 
9543     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9544         (!LegalOperations &&
9545          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9546       // Any flags available in a select/setcc fold will be on the setcc as they
9547       // migrated from fcmp
9548       Flags = N0.getNode()->getFlags();
9549       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9550                                        N2, N0.getOperand(2));
9551       SelectNode->setFlags(Flags);
9552       return SelectNode;
9553     }
9554 
9555     return SimplifySelect(DL, N0, N1, N2);
9556   }
9557 
9558   return SDValue();
9559 }
9560 
9561 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9562 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9563 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9564   SDLoc DL(N);
9565   SDValue Cond = N->getOperand(0);
9566   SDValue LHS = N->getOperand(1);
9567   SDValue RHS = N->getOperand(2);
9568   EVT VT = N->getValueType(0);
9569   int NumElems = VT.getVectorNumElements();
9570   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9571          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9572          Cond.getOpcode() == ISD::BUILD_VECTOR);
9573 
9574   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9575   // binary ones here.
9576   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9577     return SDValue();
9578 
9579   // We're sure we have an even number of elements due to the
9580   // concat_vectors we have as arguments to vselect.
9581   // Skip BV elements until we find one that's not an UNDEF
9582   // After we find an UNDEF element, keep looping until we get to half the
9583   // length of the BV and see if all the non-undef nodes are the same.
9584   ConstantSDNode *BottomHalf = nullptr;
9585   for (int i = 0; i < NumElems / 2; ++i) {
9586     if (Cond->getOperand(i)->isUndef())
9587       continue;
9588 
9589     if (BottomHalf == nullptr)
9590       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9591     else if (Cond->getOperand(i).getNode() != BottomHalf)
9592       return SDValue();
9593   }
9594 
9595   // Do the same for the second half of the BuildVector
9596   ConstantSDNode *TopHalf = nullptr;
9597   for (int i = NumElems / 2; i < NumElems; ++i) {
9598     if (Cond->getOperand(i)->isUndef())
9599       continue;
9600 
9601     if (TopHalf == nullptr)
9602       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9603     else if (Cond->getOperand(i).getNode() != TopHalf)
9604       return SDValue();
9605   }
9606 
9607   assert(TopHalf && BottomHalf &&
9608          "One half of the selector was all UNDEFs and the other was all the "
9609          "same value. This should have been addressed before this function.");
9610   return DAG.getNode(
9611       ISD::CONCAT_VECTORS, DL, VT,
9612       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9613       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9614 }
9615 
9616 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9617   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9618     return false;
9619 
9620   // For now we check only the LHS of the add.
9621   SDValue LHS = Index.getOperand(0);
9622   SDValue SplatVal = DAG.getSplatValue(LHS);
9623   if (!SplatVal)
9624     return false;
9625 
9626   BasePtr = SplatVal;
9627   Index = Index.getOperand(1);
9628   return true;
9629 }
9630 
9631 // Fold sext/zext of index into index type.
9632 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9633                      bool Scaled, SelectionDAG &DAG) {
9634   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9635 
9636   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9637     SDValue Op = Index.getOperand(0);
9638     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9639     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9640       Index = Op;
9641       return true;
9642     }
9643   }
9644 
9645   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9646     SDValue Op = Index.getOperand(0);
9647     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9648     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9649       Index = Op;
9650       return true;
9651     }
9652   }
9653 
9654   return false;
9655 }
9656 
9657 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9658   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9659   SDValue Mask = MSC->getMask();
9660   SDValue Chain = MSC->getChain();
9661   SDValue Index = MSC->getIndex();
9662   SDValue Scale = MSC->getScale();
9663   SDValue StoreVal = MSC->getValue();
9664   SDValue BasePtr = MSC->getBasePtr();
9665   SDLoc DL(N);
9666 
9667   // Zap scatters with a zero mask.
9668   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9669     return Chain;
9670 
9671   if (refineUniformBase(BasePtr, Index, DAG)) {
9672     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9673     return DAG.getMaskedScatter(
9674         DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9675         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9676   }
9677 
9678   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9679     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9680     return DAG.getMaskedScatter(
9681         DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9682         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9683   }
9684 
9685   return SDValue();
9686 }
9687 
9688 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9689   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9690   SDValue Mask = MST->getMask();
9691   SDValue Chain = MST->getChain();
9692   SDLoc DL(N);
9693 
9694   // Zap masked stores with a zero mask.
9695   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9696     return Chain;
9697 
9698   // If this is a masked load with an all ones mask, we can use a unmasked load.
9699   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9700   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9701       MST->isUnindexed() && !MST->isCompressingStore() &&
9702       !MST->isTruncatingStore())
9703     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9704                         MST->getBasePtr(), MST->getMemOperand());
9705 
9706   // Try transforming N to an indexed store.
9707   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9708     return SDValue(N, 0);
9709 
9710   return SDValue();
9711 }
9712 
9713 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9714   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9715   SDValue Mask = MGT->getMask();
9716   SDValue Chain = MGT->getChain();
9717   SDValue Index = MGT->getIndex();
9718   SDValue Scale = MGT->getScale();
9719   SDValue PassThru = MGT->getPassThru();
9720   SDValue BasePtr = MGT->getBasePtr();
9721   SDLoc DL(N);
9722 
9723   // Zap gathers with a zero mask.
9724   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9725     return CombineTo(N, PassThru, MGT->getChain());
9726 
9727   if (refineUniformBase(BasePtr, Index, DAG)) {
9728     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9729     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9730                                PassThru.getValueType(), DL, Ops,
9731                                MGT->getMemOperand(), MGT->getIndexType(),
9732                                MGT->getExtensionType());
9733   }
9734 
9735   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9736     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9737     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9738                                PassThru.getValueType(), DL, Ops,
9739                                MGT->getMemOperand(), MGT->getIndexType(),
9740                                MGT->getExtensionType());
9741   }
9742 
9743   return SDValue();
9744 }
9745 
9746 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9747   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9748   SDValue Mask = MLD->getMask();
9749   SDLoc DL(N);
9750 
9751   // Zap masked loads with a zero mask.
9752   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
9753     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9754 
9755   // If this is a masked load with an all ones mask, we can use a unmasked load.
9756   // FIXME: Can we do this for indexed, expanding, or extending loads?
9757   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
9758       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9759       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9760     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9761                                 MLD->getBasePtr(), MLD->getMemOperand());
9762     return CombineTo(N, NewLd, NewLd.getValue(1));
9763   }
9764 
9765   // Try transforming N to an indexed load.
9766   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9767     return SDValue(N, 0);
9768 
9769   return SDValue();
9770 }
9771 
9772 /// A vector select of 2 constant vectors can be simplified to math/logic to
9773 /// avoid a variable select instruction and possibly avoid constant loads.
9774 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9775   SDValue Cond = N->getOperand(0);
9776   SDValue N1 = N->getOperand(1);
9777   SDValue N2 = N->getOperand(2);
9778   EVT VT = N->getValueType(0);
9779   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9780       !TLI.convertSelectOfConstantsToMath(VT) ||
9781       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9782       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9783     return SDValue();
9784 
9785   // Check if we can use the condition value to increment/decrement a single
9786   // constant value. This simplifies a select to an add and removes a constant
9787   // load/materialization from the general case.
9788   bool AllAddOne = true;
9789   bool AllSubOne = true;
9790   unsigned Elts = VT.getVectorNumElements();
9791   for (unsigned i = 0; i != Elts; ++i) {
9792     SDValue N1Elt = N1.getOperand(i);
9793     SDValue N2Elt = N2.getOperand(i);
9794     if (N1Elt.isUndef() || N2Elt.isUndef())
9795       continue;
9796     if (N1Elt.getValueType() != N2Elt.getValueType())
9797       continue;
9798 
9799     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9800     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9801     if (C1 != C2 + 1)
9802       AllAddOne = false;
9803     if (C1 != C2 - 1)
9804       AllSubOne = false;
9805   }
9806 
9807   // Further simplifications for the extra-special cases where the constants are
9808   // all 0 or all -1 should be implemented as folds of these patterns.
9809   SDLoc DL(N);
9810   if (AllAddOne || AllSubOne) {
9811     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9812     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9813     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9814     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9815     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9816   }
9817 
9818   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9819   APInt Pow2C;
9820   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9821       isNullOrNullSplat(N2)) {
9822     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9823     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9824     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9825   }
9826 
9827   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9828     return V;
9829 
9830   // The general case for select-of-constants:
9831   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9832   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9833   // leave that to a machine-specific pass.
9834   return SDValue();
9835 }
9836 
9837 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9838   SDValue N0 = N->getOperand(0);
9839   SDValue N1 = N->getOperand(1);
9840   SDValue N2 = N->getOperand(2);
9841   EVT VT = N->getValueType(0);
9842   SDLoc DL(N);
9843 
9844   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9845     return V;
9846 
9847   if (SDValue V = foldBoolSelectToLogic(N, DAG))
9848     return V;
9849 
9850   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9851   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9852     return DAG.getSelect(DL, VT, F, N2, N1);
9853 
9854   // Canonicalize integer abs.
9855   // vselect (setg[te] X,  0),  X, -X ->
9856   // vselect (setgt    X, -1),  X, -X ->
9857   // vselect (setl[te] X,  0), -X,  X ->
9858   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9859   if (N0.getOpcode() == ISD::SETCC) {
9860     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9861     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9862     bool isAbs = false;
9863     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9864 
9865     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9866          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9867         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9868       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9869     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9870              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9871       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9872 
9873     if (isAbs) {
9874       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9875         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9876 
9877       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9878                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9879                                                   DL, getShiftAmountTy(VT)));
9880       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9881       AddToWorklist(Shift.getNode());
9882       AddToWorklist(Add.getNode());
9883       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9884     }
9885 
9886     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9887     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9888     //
9889     // This is OK if we don't care about what happens if either operand is a
9890     // NaN.
9891     //
9892     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9893       if (SDValue FMinMax =
9894               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9895         return FMinMax;
9896     }
9897 
9898     // If this select has a condition (setcc) with narrower operands than the
9899     // select, try to widen the compare to match the select width.
9900     // TODO: This should be extended to handle any constant.
9901     // TODO: This could be extended to handle non-loading patterns, but that
9902     //       requires thorough testing to avoid regressions.
9903     if (isNullOrNullSplat(RHS)) {
9904       EVT NarrowVT = LHS.getValueType();
9905       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9906       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9907       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9908       unsigned WideWidth = WideVT.getScalarSizeInBits();
9909       bool IsSigned = isSignedIntSetCC(CC);
9910       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9911       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9912           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9913           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9914           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9915         // Both compare operands can be widened for free. The LHS can use an
9916         // extended load, and the RHS is a constant:
9917         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9918         //   vselect (setcc extload(X), C'), N1, N2
9919         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9920         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9921         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9922         EVT WideSetCCVT = getSetCCResultType(WideVT);
9923         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9924         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9925       }
9926     }
9927 
9928     // Match VSELECTs into add with unsigned saturation.
9929     if (hasOperation(ISD::UADDSAT, VT)) {
9930       // Check if one of the arms of the VSELECT is vector with all bits set.
9931       // If it's on the left side invert the predicate to simplify logic below.
9932       SDValue Other;
9933       ISD::CondCode SatCC = CC;
9934       if (ISD::isBuildVectorAllOnes(N1.getNode())) {
9935         Other = N2;
9936         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9937       } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
9938         Other = N1;
9939       }
9940 
9941       if (Other && Other.getOpcode() == ISD::ADD) {
9942         SDValue CondLHS = LHS, CondRHS = RHS;
9943         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9944 
9945         // Canonicalize condition operands.
9946         if (SatCC == ISD::SETUGE) {
9947           std::swap(CondLHS, CondRHS);
9948           SatCC = ISD::SETULE;
9949         }
9950 
9951         // We can test against either of the addition operands.
9952         // x <= x+y ? x+y : ~0 --> uaddsat x, y
9953         // x+y >= x ? x+y : ~0 --> uaddsat x, y
9954         if (SatCC == ISD::SETULE && Other == CondRHS &&
9955             (OpLHS == CondLHS || OpRHS == CondLHS))
9956           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9957 
9958         if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
9959             CondLHS == OpLHS) {
9960           // If the RHS is a constant we have to reverse the const
9961           // canonicalization.
9962           // x >= ~C ? x+C : ~0 --> uaddsat x, C
9963           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9964             return Cond->getAPIntValue() == ~Op->getAPIntValue();
9965           };
9966           if (SatCC == ISD::SETULE &&
9967               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
9968             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9969         }
9970       }
9971     }
9972 
9973     // Match VSELECTs into sub with unsigned saturation.
9974     if (hasOperation(ISD::USUBSAT, VT)) {
9975       // Check if one of the arms of the VSELECT is a zero vector. If it's on
9976       // the left side invert the predicate to simplify logic below.
9977       SDValue Other;
9978       ISD::CondCode SatCC = CC;
9979       if (ISD::isBuildVectorAllZeros(N1.getNode())) {
9980         Other = N2;
9981         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9982       } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
9983         Other = N1;
9984       }
9985 
9986       if (Other && Other.getNumOperands() == 2) {
9987         SDValue CondRHS = RHS;
9988         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9989 
9990         if (Other.getOpcode() == ISD::SUB &&
9991             LHS.getOpcode() == ISD::ZERO_EXTEND && LHS.getOperand(0) == OpLHS &&
9992             OpRHS.getOpcode() == ISD::TRUNCATE && OpRHS.getOperand(0) == RHS) {
9993           // Look for a general sub with unsigned saturation first.
9994           // zext(x) >= y ? x - trunc(y) : 0
9995           // --> usubsat(x,trunc(umin(y,SatLimit)))
9996           // zext(x) >  y ? x - trunc(y) : 0
9997           // --> usubsat(x,trunc(umin(y,SatLimit)))
9998           if (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)
9999             return getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS, DAG,
10000                                        DL);
10001         }
10002 
10003         if (OpLHS == LHS) {
10004           // Look for a general sub with unsigned saturation first.
10005           // x >= y ? x-y : 0 --> usubsat x, y
10006           // x >  y ? x-y : 0 --> usubsat x, y
10007           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
10008               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
10009             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10010 
10011           if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
10012             if (isa<BuildVectorSDNode>(CondRHS)) {
10013               // If the RHS is a constant we have to reverse the const
10014               // canonicalization.
10015               // x > C-1 ? x+-C : 0 --> usubsat x, C
10016               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
10017                 return (!Op && !Cond) ||
10018                        (Op && Cond &&
10019                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
10020               };
10021               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
10022                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
10023                                             /*AllowUndefs*/ true)) {
10024                 OpRHS = DAG.getNode(ISD::SUB, DL, VT,
10025                                     DAG.getConstant(0, DL, VT), OpRHS);
10026                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10027               }
10028 
10029               // Another special case: If C was a sign bit, the sub has been
10030               // canonicalized into a xor.
10031               // FIXME: Would it be better to use computeKnownBits to determine
10032               //        whether it's safe to decanonicalize the xor?
10033               // x s< 0 ? x^C : 0 --> usubsat x, C
10034               if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
10035                 if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
10036                     ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
10037                     OpRHSConst->getAPIntValue().isSignMask()) {
10038                   // Note that we have to rebuild the RHS constant here to
10039                   // ensure we don't rely on particular values of undef lanes.
10040                   OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
10041                   return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
10042                 }
10043               }
10044             }
10045           }
10046         }
10047       }
10048     }
10049   }
10050 
10051   if (SimplifySelectOps(N, N1, N2))
10052     return SDValue(N, 0);  // Don't revisit N.
10053 
10054   // Fold (vselect all_ones, N1, N2) -> N1
10055   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
10056     return N1;
10057   // Fold (vselect all_zeros, N1, N2) -> N2
10058   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
10059     return N2;
10060 
10061   // The ConvertSelectToConcatVector function is assuming both the above
10062   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
10063   // and addressed.
10064   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
10065       N2.getOpcode() == ISD::CONCAT_VECTORS &&
10066       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
10067     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
10068       return CV;
10069   }
10070 
10071   if (SDValue V = foldVSelectOfConstants(N))
10072     return V;
10073 
10074   return SDValue();
10075 }
10076 
10077 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
10078   SDValue N0 = N->getOperand(0);
10079   SDValue N1 = N->getOperand(1);
10080   SDValue N2 = N->getOperand(2);
10081   SDValue N3 = N->getOperand(3);
10082   SDValue N4 = N->getOperand(4);
10083   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
10084 
10085   // fold select_cc lhs, rhs, x, x, cc -> x
10086   if (N2 == N3)
10087     return N2;
10088 
10089   // Determine if the condition we're dealing with is constant
10090   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
10091                                   CC, SDLoc(N), false)) {
10092     AddToWorklist(SCC.getNode());
10093 
10094     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
10095       if (!SCCC->isNullValue())
10096         return N2;    // cond always true -> true val
10097       else
10098         return N3;    // cond always false -> false val
10099     } else if (SCC->isUndef()) {
10100       // When the condition is UNDEF, just return the first operand. This is
10101       // coherent the DAG creation, no setcc node is created in this case
10102       return N2;
10103     } else if (SCC.getOpcode() == ISD::SETCC) {
10104       // Fold to a simpler select_cc
10105       SDValue SelectOp = DAG.getNode(
10106           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
10107           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
10108       SelectOp->setFlags(SCC->getFlags());
10109       return SelectOp;
10110     }
10111   }
10112 
10113   // If we can fold this based on the true/false value, do so.
10114   if (SimplifySelectOps(N, N2, N3))
10115     return SDValue(N, 0);  // Don't revisit N.
10116 
10117   // fold select_cc into other things, such as min/max/abs
10118   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
10119 }
10120 
10121 SDValue DAGCombiner::visitSETCC(SDNode *N) {
10122   // setcc is very commonly used as an argument to brcond. This pattern
10123   // also lend itself to numerous combines and, as a result, it is desired
10124   // we keep the argument to a brcond as a setcc as much as possible.
10125   bool PreferSetCC =
10126       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
10127 
10128   SDValue Combined = SimplifySetCC(
10129       N->getValueType(0), N->getOperand(0), N->getOperand(1),
10130       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
10131 
10132   if (!Combined)
10133     return SDValue();
10134 
10135   // If we prefer to have a setcc, and we don't, we'll try our best to
10136   // recreate one using rebuildSetCC.
10137   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
10138     SDValue NewSetCC = rebuildSetCC(Combined);
10139 
10140     // We don't have anything interesting to combine to.
10141     if (NewSetCC.getNode() == N)
10142       return SDValue();
10143 
10144     if (NewSetCC)
10145       return NewSetCC;
10146   }
10147 
10148   return Combined;
10149 }
10150 
10151 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
10152   SDValue LHS = N->getOperand(0);
10153   SDValue RHS = N->getOperand(1);
10154   SDValue Carry = N->getOperand(2);
10155   SDValue Cond = N->getOperand(3);
10156 
10157   // If Carry is false, fold to a regular SETCC.
10158   if (isNullConstant(Carry))
10159     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
10160 
10161   return SDValue();
10162 }
10163 
10164 /// Check if N satisfies:
10165 ///   N is used once.
10166 ///   N is a Load.
10167 ///   The load is compatible with ExtOpcode. It means
10168 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
10169 ///     extension.
10170 ///     Otherwise returns true.
10171 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
10172   if (!N.hasOneUse())
10173     return false;
10174 
10175   if (!isa<LoadSDNode>(N))
10176     return false;
10177 
10178   LoadSDNode *Load = cast<LoadSDNode>(N);
10179   ISD::LoadExtType LoadExt = Load->getExtensionType();
10180   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
10181     return true;
10182 
10183   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
10184   // extension.
10185   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
10186       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
10187     return false;
10188 
10189   return true;
10190 }
10191 
10192 /// Fold
10193 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
10194 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
10195 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
10196 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10197 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10198 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
10199                                          SelectionDAG &DAG) {
10200   unsigned Opcode = N->getOpcode();
10201   SDValue N0 = N->getOperand(0);
10202   EVT VT = N->getValueType(0);
10203   SDLoc DL(N);
10204 
10205   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10206           Opcode == ISD::ANY_EXTEND) &&
10207          "Expected EXTEND dag node in input!");
10208 
10209   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
10210       !N0.hasOneUse())
10211     return SDValue();
10212 
10213   SDValue Op1 = N0->getOperand(1);
10214   SDValue Op2 = N0->getOperand(2);
10215   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
10216     return SDValue();
10217 
10218   auto ExtLoadOpcode = ISD::EXTLOAD;
10219   if (Opcode == ISD::SIGN_EXTEND)
10220     ExtLoadOpcode = ISD::SEXTLOAD;
10221   else if (Opcode == ISD::ZERO_EXTEND)
10222     ExtLoadOpcode = ISD::ZEXTLOAD;
10223 
10224   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
10225   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
10226   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
10227       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
10228     return SDValue();
10229 
10230   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
10231   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
10232   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
10233 }
10234 
10235 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
10236 /// a build_vector of constants.
10237 /// This function is called by the DAGCombiner when visiting sext/zext/aext
10238 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
10239 /// Vector extends are not folded if operations are legal; this is to
10240 /// avoid introducing illegal build_vector dag nodes.
10241 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
10242                                          SelectionDAG &DAG, bool LegalTypes) {
10243   unsigned Opcode = N->getOpcode();
10244   SDValue N0 = N->getOperand(0);
10245   EVT VT = N->getValueType(0);
10246   SDLoc DL(N);
10247 
10248   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
10249          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
10250          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
10251          && "Expected EXTEND dag node in input!");
10252 
10253   // fold (sext c1) -> c1
10254   // fold (zext c1) -> c1
10255   // fold (aext c1) -> c1
10256   if (isa<ConstantSDNode>(N0))
10257     return DAG.getNode(Opcode, DL, VT, N0);
10258 
10259   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10260   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
10261   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
10262   if (N0->getOpcode() == ISD::SELECT) {
10263     SDValue Op1 = N0->getOperand(1);
10264     SDValue Op2 = N0->getOperand(2);
10265     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
10266         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
10267       // For any_extend, choose sign extension of the constants to allow a
10268       // possible further transform to sign_extend_inreg.i.e.
10269       //
10270       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
10271       // t2: i64 = any_extend t1
10272       // -->
10273       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
10274       // -->
10275       // t4: i64 = sign_extend_inreg t3
10276       unsigned FoldOpc = Opcode;
10277       if (FoldOpc == ISD::ANY_EXTEND)
10278         FoldOpc = ISD::SIGN_EXTEND;
10279       return DAG.getSelect(DL, VT, N0->getOperand(0),
10280                            DAG.getNode(FoldOpc, DL, VT, Op1),
10281                            DAG.getNode(FoldOpc, DL, VT, Op2));
10282     }
10283   }
10284 
10285   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10286   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10287   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10288   EVT SVT = VT.getScalarType();
10289   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10290       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10291     return SDValue();
10292 
10293   // We can fold this node into a build_vector.
10294   unsigned VTBits = SVT.getSizeInBits();
10295   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10296   SmallVector<SDValue, 8> Elts;
10297   unsigned NumElts = VT.getVectorNumElements();
10298 
10299   // For zero-extensions, UNDEF elements still guarantee to have the upper
10300   // bits set to zero.
10301   bool IsZext =
10302       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10303 
10304   for (unsigned i = 0; i != NumElts; ++i) {
10305     SDValue Op = N0.getOperand(i);
10306     if (Op.isUndef()) {
10307       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10308       continue;
10309     }
10310 
10311     SDLoc DL(Op);
10312     // Get the constant value and if needed trunc it to the size of the type.
10313     // Nodes like build_vector might have constants wider than the scalar type.
10314     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10315     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10316       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10317     else
10318       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10319   }
10320 
10321   return DAG.getBuildVector(VT, DL, Elts);
10322 }
10323 
10324 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10325 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10326 // transformation. Returns true if extension are possible and the above
10327 // mentioned transformation is profitable.
10328 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10329                                     unsigned ExtOpc,
10330                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10331                                     const TargetLowering &TLI) {
10332   bool HasCopyToRegUses = false;
10333   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10334   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10335                             UE = N0.getNode()->use_end();
10336        UI != UE; ++UI) {
10337     SDNode *User = *UI;
10338     if (User == N)
10339       continue;
10340     if (UI.getUse().getResNo() != N0.getResNo())
10341       continue;
10342     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10343     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10344       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10345       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10346         // Sign bits will be lost after a zext.
10347         return false;
10348       bool Add = false;
10349       for (unsigned i = 0; i != 2; ++i) {
10350         SDValue UseOp = User->getOperand(i);
10351         if (UseOp == N0)
10352           continue;
10353         if (!isa<ConstantSDNode>(UseOp))
10354           return false;
10355         Add = true;
10356       }
10357       if (Add)
10358         ExtendNodes.push_back(User);
10359       continue;
10360     }
10361     // If truncates aren't free and there are users we can't
10362     // extend, it isn't worthwhile.
10363     if (!isTruncFree)
10364       return false;
10365     // Remember if this value is live-out.
10366     if (User->getOpcode() == ISD::CopyToReg)
10367       HasCopyToRegUses = true;
10368   }
10369 
10370   if (HasCopyToRegUses) {
10371     bool BothLiveOut = false;
10372     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10373          UI != UE; ++UI) {
10374       SDUse &Use = UI.getUse();
10375       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10376         BothLiveOut = true;
10377         break;
10378       }
10379     }
10380     if (BothLiveOut)
10381       // Both unextended and extended values are live out. There had better be
10382       // a good reason for the transformation.
10383       return ExtendNodes.size();
10384   }
10385   return true;
10386 }
10387 
10388 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10389                                   SDValue OrigLoad, SDValue ExtLoad,
10390                                   ISD::NodeType ExtType) {
10391   // Extend SetCC uses if necessary.
10392   SDLoc DL(ExtLoad);
10393   for (SDNode *SetCC : SetCCs) {
10394     SmallVector<SDValue, 4> Ops;
10395 
10396     for (unsigned j = 0; j != 2; ++j) {
10397       SDValue SOp = SetCC->getOperand(j);
10398       if (SOp == OrigLoad)
10399         Ops.push_back(ExtLoad);
10400       else
10401         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10402     }
10403 
10404     Ops.push_back(SetCC->getOperand(2));
10405     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10406   }
10407 }
10408 
10409 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10410 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10411   SDValue N0 = N->getOperand(0);
10412   EVT DstVT = N->getValueType(0);
10413   EVT SrcVT = N0.getValueType();
10414 
10415   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10416           N->getOpcode() == ISD::ZERO_EXTEND) &&
10417          "Unexpected node type (not an extend)!");
10418 
10419   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10420   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10421   //   (v8i32 (sext (v8i16 (load x))))
10422   // into:
10423   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10424   //                          (v4i32 (sextload (x + 16)))))
10425   // Where uses of the original load, i.e.:
10426   //   (v8i16 (load x))
10427   // are replaced with:
10428   //   (v8i16 (truncate
10429   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10430   //                            (v4i32 (sextload (x + 16)))))))
10431   //
10432   // This combine is only applicable to illegal, but splittable, vectors.
10433   // All legal types, and illegal non-vector types, are handled elsewhere.
10434   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10435   //
10436   if (N0->getOpcode() != ISD::LOAD)
10437     return SDValue();
10438 
10439   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10440 
10441   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10442       !N0.hasOneUse() || !LN0->isSimple() ||
10443       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10444       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10445     return SDValue();
10446 
10447   SmallVector<SDNode *, 4> SetCCs;
10448   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10449     return SDValue();
10450 
10451   ISD::LoadExtType ExtType =
10452       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10453 
10454   // Try to split the vector types to get down to legal types.
10455   EVT SplitSrcVT = SrcVT;
10456   EVT SplitDstVT = DstVT;
10457   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10458          SplitSrcVT.getVectorNumElements() > 1) {
10459     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10460     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10461   }
10462 
10463   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10464     return SDValue();
10465 
10466   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10467 
10468   SDLoc DL(N);
10469   const unsigned NumSplits =
10470       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10471   const unsigned Stride = SplitSrcVT.getStoreSize();
10472   SmallVector<SDValue, 4> Loads;
10473   SmallVector<SDValue, 4> Chains;
10474 
10475   SDValue BasePtr = LN0->getBasePtr();
10476   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10477     const unsigned Offset = Idx * Stride;
10478     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10479 
10480     SDValue SplitLoad = DAG.getExtLoad(
10481         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10482         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10483         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10484 
10485     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10486 
10487     Loads.push_back(SplitLoad.getValue(0));
10488     Chains.push_back(SplitLoad.getValue(1));
10489   }
10490 
10491   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10492   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10493 
10494   // Simplify TF.
10495   AddToWorklist(NewChain.getNode());
10496 
10497   CombineTo(N, NewValue);
10498 
10499   // Replace uses of the original load (before extension)
10500   // with a truncate of the concatenated sextloaded vectors.
10501   SDValue Trunc =
10502       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10503   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10504   CombineTo(N0.getNode(), Trunc, NewChain);
10505   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10506 }
10507 
10508 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10509 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10510 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10511   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10512   EVT VT = N->getValueType(0);
10513   EVT OrigVT = N->getOperand(0).getValueType();
10514   if (TLI.isZExtFree(OrigVT, VT))
10515     return SDValue();
10516 
10517   // and/or/xor
10518   SDValue N0 = N->getOperand(0);
10519   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10520         N0.getOpcode() == ISD::XOR) ||
10521       N0.getOperand(1).getOpcode() != ISD::Constant ||
10522       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10523     return SDValue();
10524 
10525   // shl/shr
10526   SDValue N1 = N0->getOperand(0);
10527   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10528       N1.getOperand(1).getOpcode() != ISD::Constant ||
10529       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10530     return SDValue();
10531 
10532   // load
10533   if (!isa<LoadSDNode>(N1.getOperand(0)))
10534     return SDValue();
10535   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10536   EVT MemVT = Load->getMemoryVT();
10537   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10538       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10539     return SDValue();
10540 
10541 
10542   // If the shift op is SHL, the logic op must be AND, otherwise the result
10543   // will be wrong.
10544   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10545     return SDValue();
10546 
10547   if (!N0.hasOneUse() || !N1.hasOneUse())
10548     return SDValue();
10549 
10550   SmallVector<SDNode*, 4> SetCCs;
10551   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10552                                ISD::ZERO_EXTEND, SetCCs, TLI))
10553     return SDValue();
10554 
10555   // Actually do the transformation.
10556   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10557                                    Load->getChain(), Load->getBasePtr(),
10558                                    Load->getMemoryVT(), Load->getMemOperand());
10559 
10560   SDLoc DL1(N1);
10561   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10562                               N1.getOperand(1));
10563 
10564   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10565   SDLoc DL0(N0);
10566   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10567                             DAG.getConstant(Mask, DL0, VT));
10568 
10569   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10570   CombineTo(N, And);
10571   if (SDValue(Load, 0).hasOneUse()) {
10572     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10573   } else {
10574     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10575                                 Load->getValueType(0), ExtLoad);
10576     CombineTo(Load, Trunc, ExtLoad.getValue(1));
10577   }
10578 
10579   // N0 is dead at this point.
10580   recursivelyDeleteUnusedNodes(N0.getNode());
10581 
10582   return SDValue(N,0); // Return N so it doesn't get rechecked!
10583 }
10584 
10585 /// If we're narrowing or widening the result of a vector select and the final
10586 /// size is the same size as a setcc (compare) feeding the select, then try to
10587 /// apply the cast operation to the select's operands because matching vector
10588 /// sizes for a select condition and other operands should be more efficient.
10589 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10590   unsigned CastOpcode = Cast->getOpcode();
10591   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10592           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10593           CastOpcode == ISD::FP_ROUND) &&
10594          "Unexpected opcode for vector select narrowing/widening");
10595 
10596   // We only do this transform before legal ops because the pattern may be
10597   // obfuscated by target-specific operations after legalization. Do not create
10598   // an illegal select op, however, because that may be difficult to lower.
10599   EVT VT = Cast->getValueType(0);
10600   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10601     return SDValue();
10602 
10603   SDValue VSel = Cast->getOperand(0);
10604   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10605       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10606     return SDValue();
10607 
10608   // Does the setcc have the same vector size as the casted select?
10609   SDValue SetCC = VSel.getOperand(0);
10610   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10611   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10612     return SDValue();
10613 
10614   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10615   SDValue A = VSel.getOperand(1);
10616   SDValue B = VSel.getOperand(2);
10617   SDValue CastA, CastB;
10618   SDLoc DL(Cast);
10619   if (CastOpcode == ISD::FP_ROUND) {
10620     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10621     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10622     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10623   } else {
10624     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10625     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10626   }
10627   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10628 }
10629 
10630 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10631 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10632 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10633                                      const TargetLowering &TLI, EVT VT,
10634                                      bool LegalOperations, SDNode *N,
10635                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10636   SDNode *N0Node = N0.getNode();
10637   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10638                                                    : ISD::isZEXTLoad(N0Node);
10639   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10640       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10641     return SDValue();
10642 
10643   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10644   EVT MemVT = LN0->getMemoryVT();
10645   if ((LegalOperations || !LN0->isSimple() ||
10646        VT.isVector()) &&
10647       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10648     return SDValue();
10649 
10650   SDValue ExtLoad =
10651       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10652                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10653   Combiner.CombineTo(N, ExtLoad);
10654   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10655   if (LN0->use_empty())
10656     Combiner.recursivelyDeleteUnusedNodes(LN0);
10657   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10658 }
10659 
10660 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10661 // Only generate vector extloads when 1) they're legal, and 2) they are
10662 // deemed desirable by the target.
10663 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10664                                   const TargetLowering &TLI, EVT VT,
10665                                   bool LegalOperations, SDNode *N, SDValue N0,
10666                                   ISD::LoadExtType ExtLoadType,
10667                                   ISD::NodeType ExtOpc) {
10668   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10669       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10670       ((LegalOperations || VT.isVector() ||
10671         !cast<LoadSDNode>(N0)->isSimple()) &&
10672        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10673     return {};
10674 
10675   bool DoXform = true;
10676   SmallVector<SDNode *, 4> SetCCs;
10677   if (!N0.hasOneUse())
10678     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10679   if (VT.isVector())
10680     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10681   if (!DoXform)
10682     return {};
10683 
10684   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10685   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10686                                    LN0->getBasePtr(), N0.getValueType(),
10687                                    LN0->getMemOperand());
10688   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10689   // If the load value is used only by N, replace it via CombineTo N.
10690   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10691   Combiner.CombineTo(N, ExtLoad);
10692   if (NoReplaceTrunc) {
10693     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10694     Combiner.recursivelyDeleteUnusedNodes(LN0);
10695   } else {
10696     SDValue Trunc =
10697         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10698     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10699   }
10700   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10701 }
10702 
10703 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10704                                         const TargetLowering &TLI, EVT VT,
10705                                         SDNode *N, SDValue N0,
10706                                         ISD::LoadExtType ExtLoadType,
10707                                         ISD::NodeType ExtOpc) {
10708   if (!N0.hasOneUse())
10709     return SDValue();
10710 
10711   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10712   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10713     return SDValue();
10714 
10715   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10716     return SDValue();
10717 
10718   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10719     return SDValue();
10720 
10721   SDLoc dl(Ld);
10722   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10723   SDValue NewLoad = DAG.getMaskedLoad(
10724       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10725       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10726       ExtLoadType, Ld->isExpandingLoad());
10727   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10728   return NewLoad;
10729 }
10730 
10731 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10732                                        bool LegalOperations) {
10733   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10734           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10735 
10736   SDValue SetCC = N->getOperand(0);
10737   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10738       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10739     return SDValue();
10740 
10741   SDValue X = SetCC.getOperand(0);
10742   SDValue Ones = SetCC.getOperand(1);
10743   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10744   EVT VT = N->getValueType(0);
10745   EVT XVT = X.getValueType();
10746   // setge X, C is canonicalized to setgt, so we do not need to match that
10747   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10748   // not require the 'not' op.
10749   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10750     // Invert and smear/shift the sign bit:
10751     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10752     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10753     SDLoc DL(N);
10754     unsigned ShCt = VT.getSizeInBits() - 1;
10755     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10756     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10757       SDValue NotX = DAG.getNOT(DL, X, VT);
10758       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10759       auto ShiftOpcode =
10760         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10761       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10762     }
10763   }
10764   return SDValue();
10765 }
10766 
10767 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10768   SDValue N0 = N->getOperand(0);
10769   EVT VT = N->getValueType(0);
10770   SDLoc DL(N);
10771 
10772   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10773     return Res;
10774 
10775   // fold (sext (sext x)) -> (sext x)
10776   // fold (sext (aext x)) -> (sext x)
10777   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10778     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10779 
10780   if (N0.getOpcode() == ISD::TRUNCATE) {
10781     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10782     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10783     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10784       SDNode *oye = N0.getOperand(0).getNode();
10785       if (NarrowLoad.getNode() != N0.getNode()) {
10786         CombineTo(N0.getNode(), NarrowLoad);
10787         // CombineTo deleted the truncate, if needed, but not what's under it.
10788         AddToWorklist(oye);
10789       }
10790       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10791     }
10792 
10793     // See if the value being truncated is already sign extended.  If so, just
10794     // eliminate the trunc/sext pair.
10795     SDValue Op = N0.getOperand(0);
10796     unsigned OpBits   = Op.getScalarValueSizeInBits();
10797     unsigned MidBits  = N0.getScalarValueSizeInBits();
10798     unsigned DestBits = VT.getScalarSizeInBits();
10799     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10800 
10801     if (OpBits == DestBits) {
10802       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10803       // bits, it is already ready.
10804       if (NumSignBits > DestBits-MidBits)
10805         return Op;
10806     } else if (OpBits < DestBits) {
10807       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10808       // bits, just sext from i32.
10809       if (NumSignBits > OpBits-MidBits)
10810         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10811     } else {
10812       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10813       // bits, just truncate to i32.
10814       if (NumSignBits > OpBits-MidBits)
10815         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10816     }
10817 
10818     // fold (sext (truncate x)) -> (sextinreg x).
10819     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10820                                                  N0.getValueType())) {
10821       if (OpBits < DestBits)
10822         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10823       else if (OpBits > DestBits)
10824         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10825       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10826                          DAG.getValueType(N0.getValueType()));
10827     }
10828   }
10829 
10830   // Try to simplify (sext (load x)).
10831   if (SDValue foldedExt =
10832           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10833                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10834     return foldedExt;
10835 
10836   if (SDValue foldedExt =
10837       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10838                                ISD::SIGN_EXTEND))
10839     return foldedExt;
10840 
10841   // fold (sext (load x)) to multiple smaller sextloads.
10842   // Only on illegal but splittable vectors.
10843   if (SDValue ExtLoad = CombineExtLoad(N))
10844     return ExtLoad;
10845 
10846   // Try to simplify (sext (sextload x)).
10847   if (SDValue foldedExt = tryToFoldExtOfExtload(
10848           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10849     return foldedExt;
10850 
10851   // fold (sext (and/or/xor (load x), cst)) ->
10852   //      (and/or/xor (sextload x), (sext cst))
10853   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10854        N0.getOpcode() == ISD::XOR) &&
10855       isa<LoadSDNode>(N0.getOperand(0)) &&
10856       N0.getOperand(1).getOpcode() == ISD::Constant &&
10857       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10858     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10859     EVT MemVT = LN00->getMemoryVT();
10860     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10861       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10862       SmallVector<SDNode*, 4> SetCCs;
10863       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10864                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10865       if (DoXform) {
10866         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10867                                          LN00->getChain(), LN00->getBasePtr(),
10868                                          LN00->getMemoryVT(),
10869                                          LN00->getMemOperand());
10870         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10871         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10872                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10873         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10874         bool NoReplaceTruncAnd = !N0.hasOneUse();
10875         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10876         CombineTo(N, And);
10877         // If N0 has multiple uses, change other uses as well.
10878         if (NoReplaceTruncAnd) {
10879           SDValue TruncAnd =
10880               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10881           CombineTo(N0.getNode(), TruncAnd);
10882         }
10883         if (NoReplaceTrunc) {
10884           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10885         } else {
10886           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10887                                       LN00->getValueType(0), ExtLoad);
10888           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10889         }
10890         return SDValue(N,0); // Return N so it doesn't get rechecked!
10891       }
10892     }
10893   }
10894 
10895   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10896     return V;
10897 
10898   if (N0.getOpcode() == ISD::SETCC) {
10899     SDValue N00 = N0.getOperand(0);
10900     SDValue N01 = N0.getOperand(1);
10901     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10902     EVT N00VT = N00.getValueType();
10903 
10904     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10905     // Only do this before legalize for now.
10906     if (VT.isVector() && !LegalOperations &&
10907         TLI.getBooleanContents(N00VT) ==
10908             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10909       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10910       // of the same size as the compared operands. Only optimize sext(setcc())
10911       // if this is the case.
10912       EVT SVT = getSetCCResultType(N00VT);
10913 
10914       // If we already have the desired type, don't change it.
10915       if (SVT != N0.getValueType()) {
10916         // We know that the # elements of the results is the same as the
10917         // # elements of the compare (and the # elements of the compare result
10918         // for that matter).  Check to see that they are the same size.  If so,
10919         // we know that the element size of the sext'd result matches the
10920         // element size of the compare operands.
10921         if (VT.getSizeInBits() == SVT.getSizeInBits())
10922           return DAG.getSetCC(DL, VT, N00, N01, CC);
10923 
10924         // If the desired elements are smaller or larger than the source
10925         // elements, we can use a matching integer vector type and then
10926         // truncate/sign extend.
10927         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10928         if (SVT == MatchingVecType) {
10929           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10930           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10931         }
10932       }
10933     }
10934 
10935     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10936     // Here, T can be 1 or -1, depending on the type of the setcc and
10937     // getBooleanContents().
10938     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10939 
10940     // To determine the "true" side of the select, we need to know the high bit
10941     // of the value returned by the setcc if it evaluates to true.
10942     // If the type of the setcc is i1, then the true case of the select is just
10943     // sext(i1 1), that is, -1.
10944     // If the type of the setcc is larger (say, i8) then the value of the high
10945     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10946     // of the appropriate width.
10947     SDValue ExtTrueVal = (SetCCWidth == 1)
10948                              ? DAG.getAllOnesConstant(DL, VT)
10949                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10950     SDValue Zero = DAG.getConstant(0, DL, VT);
10951     if (SDValue SCC =
10952             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10953       return SCC;
10954 
10955     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10956       EVT SetCCVT = getSetCCResultType(N00VT);
10957       // Don't do this transform for i1 because there's a select transform
10958       // that would reverse it.
10959       // TODO: We should not do this transform at all without a target hook
10960       // because a sext is likely cheaper than a select?
10961       if (SetCCVT.getScalarSizeInBits() != 1 &&
10962           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10963         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10964         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10965       }
10966     }
10967   }
10968 
10969   // fold (sext x) -> (zext x) if the sign bit is known zero.
10970   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10971       DAG.SignBitIsZero(N0))
10972     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10973 
10974   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10975     return NewVSel;
10976 
10977   // Eliminate this sign extend by doing a negation in the destination type:
10978   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10979   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10980       isNullOrNullSplat(N0.getOperand(0)) &&
10981       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10982       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10983     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10984     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10985   }
10986   // Eliminate this sign extend by doing a decrement in the destination type:
10987   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10988   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10989       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10990       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10991       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10992     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10993     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10994   }
10995 
10996   // fold sext (not i1 X) -> add (zext i1 X), -1
10997   // TODO: This could be extended to handle bool vectors.
10998   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
10999       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
11000                             TLI.isOperationLegal(ISD::ADD, VT)))) {
11001     // If we can eliminate the 'not', the sext form should be better
11002     if (SDValue NewXor = visitXOR(N0.getNode())) {
11003       // Returning N0 is a form of in-visit replacement that may have
11004       // invalidated N0.
11005       if (NewXor.getNode() == N0.getNode()) {
11006         // Return SDValue here as the xor should have already been replaced in
11007         // this sext.
11008         return SDValue();
11009       } else {
11010         // Return a new sext with the new xor.
11011         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
11012       }
11013     }
11014 
11015     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
11016     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
11017   }
11018 
11019   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11020     return Res;
11021 
11022   return SDValue();
11023 }
11024 
11025 // isTruncateOf - If N is a truncate of some other value, return true, record
11026 // the value being truncated in Op and which of Op's bits are zero/one in Known.
11027 // This function computes KnownBits to avoid a duplicated call to
11028 // computeKnownBits in the caller.
11029 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
11030                          KnownBits &Known) {
11031   if (N->getOpcode() == ISD::TRUNCATE) {
11032     Op = N->getOperand(0);
11033     Known = DAG.computeKnownBits(Op);
11034     return true;
11035   }
11036 
11037   if (N.getOpcode() != ISD::SETCC ||
11038       N.getValueType().getScalarType() != MVT::i1 ||
11039       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
11040     return false;
11041 
11042   SDValue Op0 = N->getOperand(0);
11043   SDValue Op1 = N->getOperand(1);
11044   assert(Op0.getValueType() == Op1.getValueType());
11045 
11046   if (isNullOrNullSplat(Op0))
11047     Op = Op1;
11048   else if (isNullOrNullSplat(Op1))
11049     Op = Op0;
11050   else
11051     return false;
11052 
11053   Known = DAG.computeKnownBits(Op);
11054 
11055   return (Known.Zero | 1).isAllOnesValue();
11056 }
11057 
11058 /// Given an extending node with a pop-count operand, if the target does not
11059 /// support a pop-count in the narrow source type but does support it in the
11060 /// destination type, widen the pop-count to the destination type.
11061 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
11062   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
11063           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
11064 
11065   SDValue CtPop = Extend->getOperand(0);
11066   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
11067     return SDValue();
11068 
11069   EVT VT = Extend->getValueType(0);
11070   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11071   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
11072       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
11073     return SDValue();
11074 
11075   // zext (ctpop X) --> ctpop (zext X)
11076   SDLoc DL(Extend);
11077   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
11078   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
11079 }
11080 
11081 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
11082   SDValue N0 = N->getOperand(0);
11083   EVT VT = N->getValueType(0);
11084 
11085   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11086     return Res;
11087 
11088   // fold (zext (zext x)) -> (zext x)
11089   // fold (zext (aext x)) -> (zext x)
11090   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
11091     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
11092                        N0.getOperand(0));
11093 
11094   // fold (zext (truncate x)) -> (zext x) or
11095   //      (zext (truncate x)) -> (truncate x)
11096   // This is valid when the truncated bits of x are already zero.
11097   SDValue Op;
11098   KnownBits Known;
11099   if (isTruncateOf(DAG, N0, Op, Known)) {
11100     APInt TruncatedBits =
11101       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
11102       APInt(Op.getScalarValueSizeInBits(), 0) :
11103       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
11104                         N0.getScalarValueSizeInBits(),
11105                         std::min(Op.getScalarValueSizeInBits(),
11106                                  VT.getScalarSizeInBits()));
11107     if (TruncatedBits.isSubsetOf(Known.Zero))
11108       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11109   }
11110 
11111   // fold (zext (truncate x)) -> (and x, mask)
11112   if (N0.getOpcode() == ISD::TRUNCATE) {
11113     // fold (zext (truncate (load x))) -> (zext (smaller load x))
11114     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
11115     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11116       SDNode *oye = N0.getOperand(0).getNode();
11117       if (NarrowLoad.getNode() != N0.getNode()) {
11118         CombineTo(N0.getNode(), NarrowLoad);
11119         // CombineTo deleted the truncate, if needed, but not what's under it.
11120         AddToWorklist(oye);
11121       }
11122       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11123     }
11124 
11125     EVT SrcVT = N0.getOperand(0).getValueType();
11126     EVT MinVT = N0.getValueType();
11127 
11128     // Try to mask before the extension to avoid having to generate a larger mask,
11129     // possibly over several sub-vectors.
11130     if (SrcVT.bitsLT(VT) && VT.isVector()) {
11131       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
11132                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
11133         SDValue Op = N0.getOperand(0);
11134         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11135         AddToWorklist(Op.getNode());
11136         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
11137         // Transfer the debug info; the new node is equivalent to N0.
11138         DAG.transferDbgValues(N0, ZExtOrTrunc);
11139         return ZExtOrTrunc;
11140       }
11141     }
11142 
11143     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
11144       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11145       AddToWorklist(Op.getNode());
11146       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
11147       // We may safely transfer the debug info describing the truncate node over
11148       // to the equivalent and operation.
11149       DAG.transferDbgValues(N0, And);
11150       return And;
11151     }
11152   }
11153 
11154   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
11155   // if either of the casts is not free.
11156   if (N0.getOpcode() == ISD::AND &&
11157       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11158       N0.getOperand(1).getOpcode() == ISD::Constant &&
11159       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11160                            N0.getValueType()) ||
11161        !TLI.isZExtFree(N0.getValueType(), VT))) {
11162     SDValue X = N0.getOperand(0).getOperand(0);
11163     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
11164     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11165     SDLoc DL(N);
11166     return DAG.getNode(ISD::AND, DL, VT,
11167                        X, DAG.getConstant(Mask, DL, VT));
11168   }
11169 
11170   // Try to simplify (zext (load x)).
11171   if (SDValue foldedExt =
11172           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11173                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11174     return foldedExt;
11175 
11176   if (SDValue foldedExt =
11177       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
11178                                ISD::ZERO_EXTEND))
11179     return foldedExt;
11180 
11181   // fold (zext (load x)) to multiple smaller zextloads.
11182   // Only on illegal but splittable vectors.
11183   if (SDValue ExtLoad = CombineExtLoad(N))
11184     return ExtLoad;
11185 
11186   // fold (zext (and/or/xor (load x), cst)) ->
11187   //      (and/or/xor (zextload x), (zext cst))
11188   // Unless (and (load x) cst) will match as a zextload already and has
11189   // additional users.
11190   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
11191        N0.getOpcode() == ISD::XOR) &&
11192       isa<LoadSDNode>(N0.getOperand(0)) &&
11193       N0.getOperand(1).getOpcode() == ISD::Constant &&
11194       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
11195     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
11196     EVT MemVT = LN00->getMemoryVT();
11197     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
11198         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
11199       bool DoXform = true;
11200       SmallVector<SDNode*, 4> SetCCs;
11201       if (!N0.hasOneUse()) {
11202         if (N0.getOpcode() == ISD::AND) {
11203           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
11204           EVT LoadResultTy = AndC->getValueType(0);
11205           EVT ExtVT;
11206           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
11207             DoXform = false;
11208         }
11209       }
11210       if (DoXform)
11211         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
11212                                           ISD::ZERO_EXTEND, SetCCs, TLI);
11213       if (DoXform) {
11214         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
11215                                          LN00->getChain(), LN00->getBasePtr(),
11216                                          LN00->getMemoryVT(),
11217                                          LN00->getMemOperand());
11218         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11219         SDLoc DL(N);
11220         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
11221                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
11222         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
11223         bool NoReplaceTruncAnd = !N0.hasOneUse();
11224         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
11225         CombineTo(N, And);
11226         // If N0 has multiple uses, change other uses as well.
11227         if (NoReplaceTruncAnd) {
11228           SDValue TruncAnd =
11229               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
11230           CombineTo(N0.getNode(), TruncAnd);
11231         }
11232         if (NoReplaceTrunc) {
11233           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
11234         } else {
11235           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
11236                                       LN00->getValueType(0), ExtLoad);
11237           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
11238         }
11239         return SDValue(N,0); // Return N so it doesn't get rechecked!
11240       }
11241     }
11242   }
11243 
11244   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
11245   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
11246   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
11247     return ZExtLoad;
11248 
11249   // Try to simplify (zext (zextload x)).
11250   if (SDValue foldedExt = tryToFoldExtOfExtload(
11251           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
11252     return foldedExt;
11253 
11254   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
11255     return V;
11256 
11257   if (N0.getOpcode() == ISD::SETCC) {
11258     // Only do this before legalize for now.
11259     if (!LegalOperations && VT.isVector() &&
11260         N0.getValueType().getVectorElementType() == MVT::i1) {
11261       EVT N00VT = N0.getOperand(0).getValueType();
11262       if (getSetCCResultType(N00VT) == N0.getValueType())
11263         return SDValue();
11264 
11265       // We know that the # elements of the results is the same as the #
11266       // elements of the compare (and the # elements of the compare result for
11267       // that matter). Check to see that they are the same size. If so, we know
11268       // that the element size of the sext'd result matches the element size of
11269       // the compare operands.
11270       SDLoc DL(N);
11271       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
11272         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
11273         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
11274                                      N0.getOperand(1), N0.getOperand(2));
11275         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
11276       }
11277 
11278       // If the desired elements are smaller or larger than the source
11279       // elements we can use a matching integer vector type and then
11280       // truncate/any extend followed by zext_in_reg.
11281       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11282       SDValue VsetCC =
11283           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11284                       N0.getOperand(1), N0.getOperand(2));
11285       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11286                                     N0.getValueType());
11287     }
11288 
11289     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11290     SDLoc DL(N);
11291     EVT N0VT = N0.getValueType();
11292     EVT N00VT = N0.getOperand(0).getValueType();
11293     if (SDValue SCC = SimplifySelectCC(
11294             DL, N0.getOperand(0), N0.getOperand(1),
11295             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11296             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11297             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11298       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11299   }
11300 
11301   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11302   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11303       isa<ConstantSDNode>(N0.getOperand(1)) &&
11304       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11305       N0.hasOneUse()) {
11306     SDValue ShAmt = N0.getOperand(1);
11307     if (N0.getOpcode() == ISD::SHL) {
11308       SDValue InnerZExt = N0.getOperand(0);
11309       // If the original shl may be shifting out bits, do not perform this
11310       // transformation.
11311       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11312         InnerZExt.getOperand(0).getValueSizeInBits();
11313       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11314         return SDValue();
11315     }
11316 
11317     SDLoc DL(N);
11318 
11319     // Ensure that the shift amount is wide enough for the shifted value.
11320     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11321       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11322 
11323     return DAG.getNode(N0.getOpcode(), DL, VT,
11324                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11325                        ShAmt);
11326   }
11327 
11328   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11329     return NewVSel;
11330 
11331   if (SDValue NewCtPop = widenCtPop(N, DAG))
11332     return NewCtPop;
11333 
11334   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11335     return Res;
11336 
11337   return SDValue();
11338 }
11339 
11340 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11341   SDValue N0 = N->getOperand(0);
11342   EVT VT = N->getValueType(0);
11343 
11344   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11345     return Res;
11346 
11347   // fold (aext (aext x)) -> (aext x)
11348   // fold (aext (zext x)) -> (zext x)
11349   // fold (aext (sext x)) -> (sext x)
11350   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11351       N0.getOpcode() == ISD::ZERO_EXTEND ||
11352       N0.getOpcode() == ISD::SIGN_EXTEND)
11353     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11354 
11355   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11356   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11357   if (N0.getOpcode() == ISD::TRUNCATE) {
11358     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11359       SDNode *oye = N0.getOperand(0).getNode();
11360       if (NarrowLoad.getNode() != N0.getNode()) {
11361         CombineTo(N0.getNode(), NarrowLoad);
11362         // CombineTo deleted the truncate, if needed, but not what's under it.
11363         AddToWorklist(oye);
11364       }
11365       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11366     }
11367   }
11368 
11369   // fold (aext (truncate x))
11370   if (N0.getOpcode() == ISD::TRUNCATE)
11371     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11372 
11373   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11374   // if the trunc is not free.
11375   if (N0.getOpcode() == ISD::AND &&
11376       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11377       N0.getOperand(1).getOpcode() == ISD::Constant &&
11378       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11379                           N0.getValueType())) {
11380     SDLoc DL(N);
11381     SDValue X = N0.getOperand(0).getOperand(0);
11382     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11383     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11384     return DAG.getNode(ISD::AND, DL, VT,
11385                        X, DAG.getConstant(Mask, DL, VT));
11386   }
11387 
11388   // fold (aext (load x)) -> (aext (truncate (extload x)))
11389   // None of the supported targets knows how to perform load and any_ext
11390   // on vectors in one instruction, so attempt to fold to zext instead.
11391   if (VT.isVector()) {
11392     // Try to simplify (zext (load x)).
11393     if (SDValue foldedExt =
11394             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11395                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11396       return foldedExt;
11397   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11398              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11399              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11400     bool DoXform = true;
11401     SmallVector<SDNode *, 4> SetCCs;
11402     if (!N0.hasOneUse())
11403       DoXform =
11404           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11405     if (DoXform) {
11406       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11407       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11408                                        LN0->getChain(), LN0->getBasePtr(),
11409                                        N0.getValueType(), LN0->getMemOperand());
11410       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11411       // If the load value is used only by N, replace it via CombineTo N.
11412       bool NoReplaceTrunc = N0.hasOneUse();
11413       CombineTo(N, ExtLoad);
11414       if (NoReplaceTrunc) {
11415         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11416         recursivelyDeleteUnusedNodes(LN0);
11417       } else {
11418         SDValue Trunc =
11419             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11420         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11421       }
11422       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11423     }
11424   }
11425 
11426   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11427   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11428   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11429   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11430       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11431     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11432     ISD::LoadExtType ExtType = LN0->getExtensionType();
11433     EVT MemVT = LN0->getMemoryVT();
11434     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11435       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11436                                        VT, LN0->getChain(), LN0->getBasePtr(),
11437                                        MemVT, LN0->getMemOperand());
11438       CombineTo(N, ExtLoad);
11439       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11440       recursivelyDeleteUnusedNodes(LN0);
11441       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11442     }
11443   }
11444 
11445   if (N0.getOpcode() == ISD::SETCC) {
11446     // For vectors:
11447     // aext(setcc) -> vsetcc
11448     // aext(setcc) -> truncate(vsetcc)
11449     // aext(setcc) -> aext(vsetcc)
11450     // Only do this before legalize for now.
11451     if (VT.isVector() && !LegalOperations) {
11452       EVT N00VT = N0.getOperand(0).getValueType();
11453       if (getSetCCResultType(N00VT) == N0.getValueType())
11454         return SDValue();
11455 
11456       // We know that the # elements of the results is the same as the
11457       // # elements of the compare (and the # elements of the compare result
11458       // for that matter).  Check to see that they are the same size.  If so,
11459       // we know that the element size of the sext'd result matches the
11460       // element size of the compare operands.
11461       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11462         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11463                              N0.getOperand(1),
11464                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11465 
11466       // If the desired elements are smaller or larger than the source
11467       // elements we can use a matching integer vector type and then
11468       // truncate/any extend
11469       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11470       SDValue VsetCC =
11471         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11472                       N0.getOperand(1),
11473                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
11474       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11475     }
11476 
11477     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11478     SDLoc DL(N);
11479     if (SDValue SCC = SimplifySelectCC(
11480             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11481             DAG.getConstant(0, DL, VT),
11482             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11483       return SCC;
11484   }
11485 
11486   if (SDValue NewCtPop = widenCtPop(N, DAG))
11487     return NewCtPop;
11488 
11489   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
11490     return Res;
11491 
11492   return SDValue();
11493 }
11494 
11495 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11496   unsigned Opcode = N->getOpcode();
11497   SDValue N0 = N->getOperand(0);
11498   SDValue N1 = N->getOperand(1);
11499   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11500 
11501   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11502   if (N0.getOpcode() == Opcode &&
11503       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11504     return N0;
11505 
11506   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11507       N0.getOperand(0).getOpcode() == Opcode) {
11508     // We have an assert, truncate, assert sandwich. Make one stronger assert
11509     // by asserting on the smallest asserted type to the larger source type.
11510     // This eliminates the later assert:
11511     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11512     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11513     SDValue BigA = N0.getOperand(0);
11514     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11515     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11516            "Asserting zero/sign-extended bits to a type larger than the "
11517            "truncated destination does not provide information");
11518 
11519     SDLoc DL(N);
11520     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11521     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11522     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11523                                     BigA.getOperand(0), MinAssertVTVal);
11524     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11525   }
11526 
11527   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11528   // than X. Just move the AssertZext in front of the truncate and drop the
11529   // AssertSExt.
11530   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11531       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11532       Opcode == ISD::AssertZext) {
11533     SDValue BigA = N0.getOperand(0);
11534     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11535     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11536            "Asserting zero/sign-extended bits to a type larger than the "
11537            "truncated destination does not provide information");
11538 
11539     if (AssertVT.bitsLT(BigA_AssertVT)) {
11540       SDLoc DL(N);
11541       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11542                                       BigA.getOperand(0), N1);
11543       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11544     }
11545   }
11546 
11547   return SDValue();
11548 }
11549 
11550 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11551   SDLoc DL(N);
11552 
11553   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11554   SDValue N0 = N->getOperand(0);
11555 
11556   // Fold (assertalign (assertalign x, AL0), AL1) ->
11557   // (assertalign x, max(AL0, AL1))
11558   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11559     return DAG.getAssertAlign(DL, N0.getOperand(0),
11560                               std::max(AL, AAN->getAlign()));
11561 
11562   // In rare cases, there are trivial arithmetic ops in source operands. Sink
11563   // this assert down to source operands so that those arithmetic ops could be
11564   // exposed to the DAG combining.
11565   switch (N0.getOpcode()) {
11566   default:
11567     break;
11568   case ISD::ADD:
11569   case ISD::SUB: {
11570     unsigned AlignShift = Log2(AL);
11571     SDValue LHS = N0.getOperand(0);
11572     SDValue RHS = N0.getOperand(1);
11573     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11574     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11575     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11576       if (LHSAlignShift < AlignShift)
11577         LHS = DAG.getAssertAlign(DL, LHS, AL);
11578       if (RHSAlignShift < AlignShift)
11579         RHS = DAG.getAssertAlign(DL, RHS, AL);
11580       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11581     }
11582     break;
11583   }
11584   }
11585 
11586   return SDValue();
11587 }
11588 
11589 /// If the result of a wider load is shifted to right of N  bits and then
11590 /// truncated to a narrower type and where N is a multiple of number of bits of
11591 /// the narrower type, transform it to a narrower load from address + N / num of
11592 /// bits of new type. Also narrow the load if the result is masked with an AND
11593 /// to effectively produce a smaller type. If the result is to be extended, also
11594 /// fold the extension to form a extending load.
11595 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11596   unsigned Opc = N->getOpcode();
11597 
11598   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11599   SDValue N0 = N->getOperand(0);
11600   EVT VT = N->getValueType(0);
11601   EVT ExtVT = VT;
11602 
11603   // This transformation isn't valid for vector loads.
11604   if (VT.isVector())
11605     return SDValue();
11606 
11607   unsigned ShAmt = 0;
11608   bool HasShiftedOffset = false;
11609   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11610   // extended to VT.
11611   if (Opc == ISD::SIGN_EXTEND_INREG) {
11612     ExtType = ISD::SEXTLOAD;
11613     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11614   } else if (Opc == ISD::SRL) {
11615     // Another special-case: SRL is basically zero-extending a narrower value,
11616     // or it maybe shifting a higher subword, half or byte into the lowest
11617     // bits.
11618     ExtType = ISD::ZEXTLOAD;
11619     N0 = SDValue(N, 0);
11620 
11621     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11622     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11623     if (!N01 || !LN0)
11624       return SDValue();
11625 
11626     uint64_t ShiftAmt = N01->getZExtValue();
11627     uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11628     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11629       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11630     else
11631       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11632                                 VT.getScalarSizeInBits() - ShiftAmt);
11633   } else if (Opc == ISD::AND) {
11634     // An AND with a constant mask is the same as a truncate + zero-extend.
11635     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11636     if (!AndC)
11637       return SDValue();
11638 
11639     const APInt &Mask = AndC->getAPIntValue();
11640     unsigned ActiveBits = 0;
11641     if (Mask.isMask()) {
11642       ActiveBits = Mask.countTrailingOnes();
11643     } else if (Mask.isShiftedMask()) {
11644       ShAmt = Mask.countTrailingZeros();
11645       APInt ShiftedMask = Mask.lshr(ShAmt);
11646       ActiveBits = ShiftedMask.countTrailingOnes();
11647       HasShiftedOffset = true;
11648     } else
11649       return SDValue();
11650 
11651     ExtType = ISD::ZEXTLOAD;
11652     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11653   }
11654 
11655   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11656     SDValue SRL = N0;
11657     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11658       ShAmt = ConstShift->getZExtValue();
11659       unsigned EVTBits = ExtVT.getScalarSizeInBits();
11660       // Is the shift amount a multiple of size of VT?
11661       if ((ShAmt & (EVTBits-1)) == 0) {
11662         N0 = N0.getOperand(0);
11663         // Is the load width a multiple of size of VT?
11664         if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11665           return SDValue();
11666       }
11667 
11668       // At this point, we must have a load or else we can't do the transform.
11669       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11670       if (!LN0) return SDValue();
11671 
11672       // Because a SRL must be assumed to *need* to zero-extend the high bits
11673       // (as opposed to anyext the high bits), we can't combine the zextload
11674       // lowering of SRL and an sextload.
11675       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11676         return SDValue();
11677 
11678       // If the shift amount is larger than the input type then we're not
11679       // accessing any of the loaded bytes.  If the load was a zextload/extload
11680       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11681       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11682         return SDValue();
11683 
11684       // If the SRL is only used by a masking AND, we may be able to adjust
11685       // the ExtVT to make the AND redundant.
11686       SDNode *Mask = *(SRL->use_begin());
11687       if (Mask->getOpcode() == ISD::AND &&
11688           isa<ConstantSDNode>(Mask->getOperand(1))) {
11689         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11690         if (ShiftMask.isMask()) {
11691           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11692                                            ShiftMask.countTrailingOnes());
11693           // If the mask is smaller, recompute the type.
11694           if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11695               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11696             ExtVT = MaskedVT;
11697         }
11698       }
11699     }
11700   }
11701 
11702   // If the load is shifted left (and the result isn't shifted back right),
11703   // we can fold the truncate through the shift.
11704   unsigned ShLeftAmt = 0;
11705   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11706       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11707     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11708       ShLeftAmt = N01->getZExtValue();
11709       N0 = N0.getOperand(0);
11710     }
11711   }
11712 
11713   // If we haven't found a load, we can't narrow it.
11714   if (!isa<LoadSDNode>(N0))
11715     return SDValue();
11716 
11717   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11718   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11719   // able to reduce the width provided we never widen again. (see D66309)
11720   if (!LN0->isSimple() ||
11721       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11722     return SDValue();
11723 
11724   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11725     unsigned LVTStoreBits =
11726         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11727     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11728     return LVTStoreBits - EVTStoreBits - ShAmt;
11729   };
11730 
11731   // For big endian targets, we need to adjust the offset to the pointer to
11732   // load the correct bytes.
11733   if (DAG.getDataLayout().isBigEndian())
11734     ShAmt = AdjustBigEndianShift(ShAmt);
11735 
11736   uint64_t PtrOff = ShAmt / 8;
11737   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11738   SDLoc DL(LN0);
11739   // The original load itself didn't wrap, so an offset within it doesn't.
11740   SDNodeFlags Flags;
11741   Flags.setNoUnsignedWrap(true);
11742   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11743                                             TypeSize::Fixed(PtrOff), DL, Flags);
11744   AddToWorklist(NewPtr.getNode());
11745 
11746   SDValue Load;
11747   if (ExtType == ISD::NON_EXTLOAD)
11748     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11749                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11750                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11751   else
11752     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11753                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11754                           NewAlign, LN0->getMemOperand()->getFlags(),
11755                           LN0->getAAInfo());
11756 
11757   // Replace the old load's chain with the new load's chain.
11758   WorklistRemover DeadNodes(*this);
11759   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11760 
11761   // Shift the result left, if we've swallowed a left shift.
11762   SDValue Result = Load;
11763   if (ShLeftAmt != 0) {
11764     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11765     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11766       ShImmTy = VT;
11767     // If the shift amount is as large as the result size (but, presumably,
11768     // no larger than the source) then the useful bits of the result are
11769     // zero; we can't simply return the shortened shift, because the result
11770     // of that operation is undefined.
11771     if (ShLeftAmt >= VT.getScalarSizeInBits())
11772       Result = DAG.getConstant(0, DL, VT);
11773     else
11774       Result = DAG.getNode(ISD::SHL, DL, VT,
11775                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11776   }
11777 
11778   if (HasShiftedOffset) {
11779     // Recalculate the shift amount after it has been altered to calculate
11780     // the offset.
11781     if (DAG.getDataLayout().isBigEndian())
11782       ShAmt = AdjustBigEndianShift(ShAmt);
11783 
11784     // We're using a shifted mask, so the load now has an offset. This means
11785     // that data has been loaded into the lower bytes than it would have been
11786     // before, so we need to shl the loaded data into the correct position in the
11787     // register.
11788     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11789     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11790     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11791   }
11792 
11793   // Return the new loaded value.
11794   return Result;
11795 }
11796 
11797 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11798   SDValue N0 = N->getOperand(0);
11799   SDValue N1 = N->getOperand(1);
11800   EVT VT = N->getValueType(0);
11801   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11802   unsigned VTBits = VT.getScalarSizeInBits();
11803   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11804 
11805   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11806   if (N0.isUndef())
11807     return DAG.getConstant(0, SDLoc(N), VT);
11808 
11809   // fold (sext_in_reg c1) -> c1
11810   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11811     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11812 
11813   // If the input is already sign extended, just drop the extension.
11814   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11815     return N0;
11816 
11817   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11818   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11819       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11820     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11821                        N1);
11822 
11823   // fold (sext_in_reg (sext x)) -> (sext x)
11824   // fold (sext_in_reg (aext x)) -> (sext x)
11825   // if x is small enough or if we know that x has more than 1 sign bit and the
11826   // sign_extend_inreg is extending from one of them.
11827   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11828     SDValue N00 = N0.getOperand(0);
11829     unsigned N00Bits = N00.getScalarValueSizeInBits();
11830     if ((N00Bits <= ExtVTBits ||
11831          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11832         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11833       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11834   }
11835 
11836   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11837   // if x is small enough or if we know that x has more than 1 sign bit and the
11838   // sign_extend_inreg is extending from one of them.
11839   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11840       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11841       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
11842     SDValue N00 = N0.getOperand(0);
11843     unsigned N00Bits = N00.getScalarValueSizeInBits();
11844     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
11845     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
11846     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
11847     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
11848     if ((N00Bits == ExtVTBits ||
11849          (!IsZext && (N00Bits < ExtVTBits ||
11850                       (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
11851                           ExtVTBits))) &&
11852         (!LegalOperations ||
11853          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
11854       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
11855   }
11856 
11857   // fold (sext_in_reg (zext x)) -> (sext x)
11858   // iff we are extending the source sign bit.
11859   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11860     SDValue N00 = N0.getOperand(0);
11861     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11862         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11863       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11864   }
11865 
11866   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11867   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11868     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11869 
11870   // fold operands of sext_in_reg based on knowledge that the top bits are not
11871   // demanded.
11872   if (SimplifyDemandedBits(SDValue(N, 0)))
11873     return SDValue(N, 0);
11874 
11875   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11876   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11877   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11878     return NarrowLoad;
11879 
11880   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11881   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11882   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11883   if (N0.getOpcode() == ISD::SRL) {
11884     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11885       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11886         // We can turn this into an SRA iff the input to the SRL is already sign
11887         // extended enough.
11888         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11889         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11890           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11891                              N0.getOperand(1));
11892       }
11893   }
11894 
11895   // fold (sext_inreg (extload x)) -> (sextload x)
11896   // If sextload is not supported by target, we can only do the combine when
11897   // load has one use. Doing otherwise can block folding the extload with other
11898   // extends that the target does support.
11899   if (ISD::isEXTLoad(N0.getNode()) &&
11900       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11901       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11902       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11903         N0.hasOneUse()) ||
11904        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11905     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11906     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11907                                      LN0->getChain(),
11908                                      LN0->getBasePtr(), ExtVT,
11909                                      LN0->getMemOperand());
11910     CombineTo(N, ExtLoad);
11911     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11912     AddToWorklist(ExtLoad.getNode());
11913     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11914   }
11915   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11916   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11917       N0.hasOneUse() &&
11918       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11919       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11920        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11921     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11922     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11923                                      LN0->getChain(),
11924                                      LN0->getBasePtr(), ExtVT,
11925                                      LN0->getMemOperand());
11926     CombineTo(N, ExtLoad);
11927     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11928     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11929   }
11930 
11931   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
11932   // ignore it if the masked load is already sign extended
11933   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
11934     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
11935         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
11936         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
11937       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
11938           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
11939           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
11940           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
11941       CombineTo(N, ExtMaskedLoad);
11942       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
11943       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11944     }
11945   }
11946 
11947   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
11948   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
11949     if (SDValue(GN0, 0).hasOneUse() &&
11950         ExtVT == GN0->getMemoryVT() &&
11951         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
11952       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
11953                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
11954 
11955       SDValue ExtLoad = DAG.getMaskedGather(
11956           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
11957           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
11958 
11959       CombineTo(N, ExtLoad);
11960       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11961       AddToWorklist(ExtLoad.getNode());
11962       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11963     }
11964   }
11965 
11966   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11967   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11968     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11969                                            N0.getOperand(1), false))
11970       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11971   }
11972 
11973   return SDValue();
11974 }
11975 
11976 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11977   SDValue N0 = N->getOperand(0);
11978   EVT VT = N->getValueType(0);
11979 
11980   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11981   if (N0.isUndef())
11982     return DAG.getConstant(0, SDLoc(N), VT);
11983 
11984   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11985     return Res;
11986 
11987   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11988     return SDValue(N, 0);
11989 
11990   return SDValue();
11991 }
11992 
11993 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11994   SDValue N0 = N->getOperand(0);
11995   EVT VT = N->getValueType(0);
11996 
11997   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11998   if (N0.isUndef())
11999     return DAG.getConstant(0, SDLoc(N), VT);
12000 
12001   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
12002     return Res;
12003 
12004   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12005     return SDValue(N, 0);
12006 
12007   return SDValue();
12008 }
12009 
12010 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
12011   SDValue N0 = N->getOperand(0);
12012   EVT VT = N->getValueType(0);
12013   EVT SrcVT = N0.getValueType();
12014   bool isLE = DAG.getDataLayout().isLittleEndian();
12015 
12016   // noop truncate
12017   if (SrcVT == VT)
12018     return N0;
12019 
12020   // fold (truncate (truncate x)) -> (truncate x)
12021   if (N0.getOpcode() == ISD::TRUNCATE)
12022     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12023 
12024   // fold (truncate c1) -> c1
12025   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
12026     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
12027     if (C.getNode() != N)
12028       return C;
12029   }
12030 
12031   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
12032   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
12033       N0.getOpcode() == ISD::SIGN_EXTEND ||
12034       N0.getOpcode() == ISD::ANY_EXTEND) {
12035     // if the source is smaller than the dest, we still need an extend.
12036     if (N0.getOperand(0).getValueType().bitsLT(VT))
12037       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
12038     // if the source is larger than the dest, than we just need the truncate.
12039     if (N0.getOperand(0).getValueType().bitsGT(VT))
12040       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
12041     // if the source and dest are the same type, we can drop both the extend
12042     // and the truncate.
12043     return N0.getOperand(0);
12044   }
12045 
12046   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
12047   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
12048     return SDValue();
12049 
12050   // Fold extract-and-trunc into a narrow extract. For example:
12051   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
12052   //   i32 y = TRUNCATE(i64 x)
12053   //        -- becomes --
12054   //   v16i8 b = BITCAST (v2i64 val)
12055   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
12056   //
12057   // Note: We only run this optimization after type legalization (which often
12058   // creates this pattern) and before operation legalization after which
12059   // we need to be more careful about the vector instructions that we generate.
12060   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
12061       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
12062     EVT VecTy = N0.getOperand(0).getValueType();
12063     EVT ExTy = N0.getValueType();
12064     EVT TrTy = N->getValueType(0);
12065 
12066     auto EltCnt = VecTy.getVectorElementCount();
12067     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
12068     auto NewEltCnt = EltCnt * SizeRatio;
12069 
12070     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
12071     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
12072 
12073     SDValue EltNo = N0->getOperand(1);
12074     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
12075       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12076       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
12077 
12078       SDLoc DL(N);
12079       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
12080                          DAG.getBitcast(NVT, N0.getOperand(0)),
12081                          DAG.getVectorIdxConstant(Index, DL));
12082     }
12083   }
12084 
12085   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
12086   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
12087     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
12088         TLI.isTruncateFree(SrcVT, VT)) {
12089       SDLoc SL(N0);
12090       SDValue Cond = N0.getOperand(0);
12091       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12092       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
12093       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
12094     }
12095   }
12096 
12097   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
12098   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
12099       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
12100       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
12101     SDValue Amt = N0.getOperand(1);
12102     KnownBits Known = DAG.computeKnownBits(Amt);
12103     unsigned Size = VT.getScalarSizeInBits();
12104     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
12105       SDLoc SL(N);
12106       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
12107 
12108       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12109       if (AmtVT != Amt.getValueType()) {
12110         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
12111         AddToWorklist(Amt.getNode());
12112       }
12113       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
12114     }
12115   }
12116 
12117   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
12118     return V;
12119 
12120   // Attempt to pre-truncate BUILD_VECTOR sources.
12121   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
12122       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
12123       // Avoid creating illegal types if running after type legalizer.
12124       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
12125     SDLoc DL(N);
12126     EVT SVT = VT.getScalarType();
12127     SmallVector<SDValue, 8> TruncOps;
12128     for (const SDValue &Op : N0->op_values()) {
12129       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
12130       TruncOps.push_back(TruncOp);
12131     }
12132     return DAG.getBuildVector(VT, DL, TruncOps);
12133   }
12134 
12135   // Fold a series of buildvector, bitcast, and truncate if possible.
12136   // For example fold
12137   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
12138   //   (2xi32 (buildvector x, y)).
12139   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
12140       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12141       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
12142       N0.getOperand(0).hasOneUse()) {
12143     SDValue BuildVect = N0.getOperand(0);
12144     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
12145     EVT TruncVecEltTy = VT.getVectorElementType();
12146 
12147     // Check that the element types match.
12148     if (BuildVectEltTy == TruncVecEltTy) {
12149       // Now we only need to compute the offset of the truncated elements.
12150       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
12151       unsigned TruncVecNumElts = VT.getVectorNumElements();
12152       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
12153 
12154       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
12155              "Invalid number of elements");
12156 
12157       SmallVector<SDValue, 8> Opnds;
12158       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
12159         Opnds.push_back(BuildVect.getOperand(i));
12160 
12161       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
12162     }
12163   }
12164 
12165   // See if we can simplify the input to this truncate through knowledge that
12166   // only the low bits are being used.
12167   // For example "trunc (or (shl x, 8), y)" // -> trunc y
12168   // Currently we only perform this optimization on scalars because vectors
12169   // may have different active low bits.
12170   if (!VT.isVector()) {
12171     APInt Mask =
12172         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
12173     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
12174       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
12175   }
12176 
12177   // fold (truncate (load x)) -> (smaller load x)
12178   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
12179   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
12180     if (SDValue Reduced = ReduceLoadWidth(N))
12181       return Reduced;
12182 
12183     // Handle the case where the load remains an extending load even
12184     // after truncation.
12185     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
12186       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12187       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
12188         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
12189                                          VT, LN0->getChain(), LN0->getBasePtr(),
12190                                          LN0->getMemoryVT(),
12191                                          LN0->getMemOperand());
12192         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
12193         return NewLoad;
12194       }
12195     }
12196   }
12197 
12198   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
12199   // where ... are all 'undef'.
12200   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
12201     SmallVector<EVT, 8> VTs;
12202     SDValue V;
12203     unsigned Idx = 0;
12204     unsigned NumDefs = 0;
12205 
12206     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
12207       SDValue X = N0.getOperand(i);
12208       if (!X.isUndef()) {
12209         V = X;
12210         Idx = i;
12211         NumDefs++;
12212       }
12213       // Stop if more than one members are non-undef.
12214       if (NumDefs > 1)
12215         break;
12216 
12217       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
12218                                      VT.getVectorElementType(),
12219                                      X.getValueType().getVectorElementCount()));
12220     }
12221 
12222     if (NumDefs == 0)
12223       return DAG.getUNDEF(VT);
12224 
12225     if (NumDefs == 1) {
12226       assert(V.getNode() && "The single defined operand is empty!");
12227       SmallVector<SDValue, 8> Opnds;
12228       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
12229         if (i != Idx) {
12230           Opnds.push_back(DAG.getUNDEF(VTs[i]));
12231           continue;
12232         }
12233         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
12234         AddToWorklist(NV.getNode());
12235         Opnds.push_back(NV);
12236       }
12237       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
12238     }
12239   }
12240 
12241   // Fold truncate of a bitcast of a vector to an extract of the low vector
12242   // element.
12243   //
12244   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
12245   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
12246     SDValue VecSrc = N0.getOperand(0);
12247     EVT VecSrcVT = VecSrc.getValueType();
12248     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
12249         (!LegalOperations ||
12250          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
12251       SDLoc SL(N);
12252 
12253       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
12254       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
12255                          DAG.getVectorIdxConstant(Idx, SL));
12256     }
12257   }
12258 
12259   // Simplify the operands using demanded-bits information.
12260   if (SimplifyDemandedBits(SDValue(N, 0)))
12261     return SDValue(N, 0);
12262 
12263   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
12264   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
12265   // When the adde's carry is not used.
12266   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
12267       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
12268       // We only do for addcarry before legalize operation
12269       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
12270        TLI.isOperationLegal(N0.getOpcode(), VT))) {
12271     SDLoc SL(N);
12272     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
12273     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
12274     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
12275     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
12276   }
12277 
12278   // fold (truncate (extract_subvector(ext x))) ->
12279   //      (extract_subvector x)
12280   // TODO: This can be generalized to cover cases where the truncate and extract
12281   // do not fully cancel each other out.
12282   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
12283     SDValue N00 = N0.getOperand(0);
12284     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
12285         N00.getOpcode() == ISD::ZERO_EXTEND ||
12286         N00.getOpcode() == ISD::ANY_EXTEND) {
12287       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
12288           VT.getVectorElementType())
12289         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
12290                            N00.getOperand(0), N0.getOperand(1));
12291     }
12292   }
12293 
12294   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12295     return NewVSel;
12296 
12297   // Narrow a suitable binary operation with a non-opaque constant operand by
12298   // moving it ahead of the truncate. This is limited to pre-legalization
12299   // because targets may prefer a wider type during later combines and invert
12300   // this transform.
12301   switch (N0.getOpcode()) {
12302   case ISD::ADD:
12303   case ISD::SUB:
12304   case ISD::MUL:
12305   case ISD::AND:
12306   case ISD::OR:
12307   case ISD::XOR:
12308     if (!LegalOperations && N0.hasOneUse() &&
12309         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12310          isConstantOrConstantVector(N0.getOperand(1), true))) {
12311       // TODO: We already restricted this to pre-legalization, but for vectors
12312       // we are extra cautious to not create an unsupported operation.
12313       // Target-specific changes are likely needed to avoid regressions here.
12314       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12315         SDLoc DL(N);
12316         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12317         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12318         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12319       }
12320     }
12321     break;
12322   case ISD::USUBSAT:
12323     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
12324     // enough to know that the upper bits are zero we must ensure that we don't
12325     // introduce an extra truncate.
12326     if (!LegalOperations && N0.hasOneUse() &&
12327         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
12328         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
12329             VT.getScalarSizeInBits() &&
12330         hasOperation(N0.getOpcode(), VT)) {
12331       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
12332                                  DAG, SDLoc(N));
12333     }
12334     break;
12335   }
12336 
12337   return SDValue();
12338 }
12339 
12340 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12341   SDValue Elt = N->getOperand(i);
12342   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12343     return Elt.getNode();
12344   return Elt.getOperand(Elt.getResNo()).getNode();
12345 }
12346 
12347 /// build_pair (load, load) -> load
12348 /// if load locations are consecutive.
12349 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12350   assert(N->getOpcode() == ISD::BUILD_PAIR);
12351 
12352   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12353   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12354 
12355   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12356   // most significant part in elt 1. So when combining into one large load, we
12357   // need to consider the endianness.
12358   if (DAG.getDataLayout().isBigEndian())
12359     std::swap(LD1, LD2);
12360 
12361   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
12362       LD1->getAddressSpace() != LD2->getAddressSpace())
12363     return SDValue();
12364   EVT LD1VT = LD1->getValueType(0);
12365   unsigned LD1Bytes = LD1VT.getStoreSize();
12366   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12367       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12368     Align Alignment = LD1->getAlign();
12369     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12370         VT.getTypeForEVT(*DAG.getContext()));
12371 
12372     if (NewAlign <= Alignment &&
12373         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12374       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12375                          LD1->getPointerInfo(), Alignment);
12376   }
12377 
12378   return SDValue();
12379 }
12380 
12381 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12382   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12383   // and Lo parts; on big-endian machines it doesn't.
12384   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12385 }
12386 
12387 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12388                                     const TargetLowering &TLI) {
12389   // If this is not a bitcast to an FP type or if the target doesn't have
12390   // IEEE754-compliant FP logic, we're done.
12391   EVT VT = N->getValueType(0);
12392   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12393     return SDValue();
12394 
12395   // TODO: Handle cases where the integer constant is a different scalar
12396   // bitwidth to the FP.
12397   SDValue N0 = N->getOperand(0);
12398   EVT SourceVT = N0.getValueType();
12399   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12400     return SDValue();
12401 
12402   unsigned FPOpcode;
12403   APInt SignMask;
12404   switch (N0.getOpcode()) {
12405   case ISD::AND:
12406     FPOpcode = ISD::FABS;
12407     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12408     break;
12409   case ISD::XOR:
12410     FPOpcode = ISD::FNEG;
12411     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12412     break;
12413   case ISD::OR:
12414     FPOpcode = ISD::FABS;
12415     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12416     break;
12417   default:
12418     return SDValue();
12419   }
12420 
12421   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12422   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12423   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12424   //   fneg (fabs X)
12425   SDValue LogicOp0 = N0.getOperand(0);
12426   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12427   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12428       LogicOp0.getOpcode() == ISD::BITCAST &&
12429       LogicOp0.getOperand(0).getValueType() == VT) {
12430     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12431     NumFPLogicOpsConv++;
12432     if (N0.getOpcode() == ISD::OR)
12433       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12434     return FPOp;
12435   }
12436 
12437   return SDValue();
12438 }
12439 
12440 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12441   SDValue N0 = N->getOperand(0);
12442   EVT VT = N->getValueType(0);
12443 
12444   if (N0.isUndef())
12445     return DAG.getUNDEF(VT);
12446 
12447   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12448   // Only do this before legalize types, unless both types are integer and the
12449   // scalar type is legal. Only do this before legalize ops, since the target
12450   // maybe depending on the bitcast.
12451   // First check to see if this is all constant.
12452   // TODO: Support FP bitcasts after legalize types.
12453   if (VT.isVector() &&
12454       (!LegalTypes ||
12455        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12456         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12457       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12458       cast<BuildVectorSDNode>(N0)->isConstant())
12459     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12460                                              VT.getVectorElementType());
12461 
12462   // If the input is a constant, let getNode fold it.
12463   if (isIntOrFPConstant(N0)) {
12464     // If we can't allow illegal operations, we need to check that this is just
12465     // a fp -> int or int -> conversion and that the resulting operation will
12466     // be legal.
12467     if (!LegalOperations ||
12468         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12469          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12470         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12471          TLI.isOperationLegal(ISD::Constant, VT))) {
12472       SDValue C = DAG.getBitcast(VT, N0);
12473       if (C.getNode() != N)
12474         return C;
12475     }
12476   }
12477 
12478   // (conv (conv x, t1), t2) -> (conv x, t2)
12479   if (N0.getOpcode() == ISD::BITCAST)
12480     return DAG.getBitcast(VT, N0.getOperand(0));
12481 
12482   // fold (conv (load x)) -> (load (conv*)x)
12483   // If the resultant load doesn't need a higher alignment than the original!
12484   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12485       // Do not remove the cast if the types differ in endian layout.
12486       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12487           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12488       // If the load is volatile, we only want to change the load type if the
12489       // resulting load is legal. Otherwise we might increase the number of
12490       // memory accesses. We don't care if the original type was legal or not
12491       // as we assume software couldn't rely on the number of accesses of an
12492       // illegal type.
12493       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12494        TLI.isOperationLegal(ISD::LOAD, VT))) {
12495     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12496 
12497     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12498                                     *LN0->getMemOperand())) {
12499       SDValue Load =
12500           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12501                       LN0->getPointerInfo(), LN0->getAlign(),
12502                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12503       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12504       return Load;
12505     }
12506   }
12507 
12508   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12509     return V;
12510 
12511   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12512   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12513   //
12514   // For ppc_fp128:
12515   // fold (bitcast (fneg x)) ->
12516   //     flipbit = signbit
12517   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12518   //
12519   // fold (bitcast (fabs x)) ->
12520   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
12521   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12522   // This often reduces constant pool loads.
12523   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12524        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12525       N0.getNode()->hasOneUse() && VT.isInteger() &&
12526       !VT.isVector() && !N0.getValueType().isVector()) {
12527     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12528     AddToWorklist(NewConv.getNode());
12529 
12530     SDLoc DL(N);
12531     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12532       assert(VT.getSizeInBits() == 128);
12533       SDValue SignBit = DAG.getConstant(
12534           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12535       SDValue FlipBit;
12536       if (N0.getOpcode() == ISD::FNEG) {
12537         FlipBit = SignBit;
12538         AddToWorklist(FlipBit.getNode());
12539       } else {
12540         assert(N0.getOpcode() == ISD::FABS);
12541         SDValue Hi =
12542             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12543                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12544                                               SDLoc(NewConv)));
12545         AddToWorklist(Hi.getNode());
12546         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12547         AddToWorklist(FlipBit.getNode());
12548       }
12549       SDValue FlipBits =
12550           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12551       AddToWorklist(FlipBits.getNode());
12552       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12553     }
12554     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12555     if (N0.getOpcode() == ISD::FNEG)
12556       return DAG.getNode(ISD::XOR, DL, VT,
12557                          NewConv, DAG.getConstant(SignBit, DL, VT));
12558     assert(N0.getOpcode() == ISD::FABS);
12559     return DAG.getNode(ISD::AND, DL, VT,
12560                        NewConv, DAG.getConstant(~SignBit, DL, VT));
12561   }
12562 
12563   // fold (bitconvert (fcopysign cst, x)) ->
12564   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
12565   // Note that we don't handle (copysign x, cst) because this can always be
12566   // folded to an fneg or fabs.
12567   //
12568   // For ppc_fp128:
12569   // fold (bitcast (fcopysign cst, x)) ->
12570   //     flipbit = (and (extract_element
12571   //                     (xor (bitcast cst), (bitcast x)), 0),
12572   //                    signbit)
12573   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
12574   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12575       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12576       VT.isInteger() && !VT.isVector()) {
12577     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12578     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12579     if (isTypeLegal(IntXVT)) {
12580       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12581       AddToWorklist(X.getNode());
12582 
12583       // If X has a different width than the result/lhs, sext it or truncate it.
12584       unsigned VTWidth = VT.getSizeInBits();
12585       if (OrigXWidth < VTWidth) {
12586         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12587         AddToWorklist(X.getNode());
12588       } else if (OrigXWidth > VTWidth) {
12589         // To get the sign bit in the right place, we have to shift it right
12590         // before truncating.
12591         SDLoc DL(X);
12592         X = DAG.getNode(ISD::SRL, DL,
12593                         X.getValueType(), X,
12594                         DAG.getConstant(OrigXWidth-VTWidth, DL,
12595                                         X.getValueType()));
12596         AddToWorklist(X.getNode());
12597         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12598         AddToWorklist(X.getNode());
12599       }
12600 
12601       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12602         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12603         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12604         AddToWorklist(Cst.getNode());
12605         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12606         AddToWorklist(X.getNode());
12607         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12608         AddToWorklist(XorResult.getNode());
12609         SDValue XorResult64 = DAG.getNode(
12610             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12611             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12612                                   SDLoc(XorResult)));
12613         AddToWorklist(XorResult64.getNode());
12614         SDValue FlipBit =
12615             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12616                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12617         AddToWorklist(FlipBit.getNode());
12618         SDValue FlipBits =
12619             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12620         AddToWorklist(FlipBits.getNode());
12621         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12622       }
12623       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12624       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12625                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
12626       AddToWorklist(X.getNode());
12627 
12628       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12629       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12630                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12631       AddToWorklist(Cst.getNode());
12632 
12633       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12634     }
12635   }
12636 
12637   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12638   if (N0.getOpcode() == ISD::BUILD_PAIR)
12639     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12640       return CombineLD;
12641 
12642   // Remove double bitcasts from shuffles - this is often a legacy of
12643   // XformToShuffleWithZero being used to combine bitmaskings (of
12644   // float vectors bitcast to integer vectors) into shuffles.
12645   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12646   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12647       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12648       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12649       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12650     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12651 
12652     // If operands are a bitcast, peek through if it casts the original VT.
12653     // If operands are a constant, just bitcast back to original VT.
12654     auto PeekThroughBitcast = [&](SDValue Op) {
12655       if (Op.getOpcode() == ISD::BITCAST &&
12656           Op.getOperand(0).getValueType() == VT)
12657         return SDValue(Op.getOperand(0));
12658       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12659           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12660         return DAG.getBitcast(VT, Op);
12661       return SDValue();
12662     };
12663 
12664     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12665     // the result type of this bitcast. This would eliminate at least one
12666     // bitcast. See the transform in InstCombine.
12667     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12668     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12669     if (!(SV0 && SV1))
12670       return SDValue();
12671 
12672     int MaskScale =
12673         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12674     SmallVector<int, 8> NewMask;
12675     for (int M : SVN->getMask())
12676       for (int i = 0; i != MaskScale; ++i)
12677         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12678 
12679     SDValue LegalShuffle =
12680         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12681     if (LegalShuffle)
12682       return LegalShuffle;
12683   }
12684 
12685   return SDValue();
12686 }
12687 
12688 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12689   EVT VT = N->getValueType(0);
12690   return CombineConsecutiveLoads(N, VT);
12691 }
12692 
12693 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12694   SDValue N0 = N->getOperand(0);
12695 
12696   // (freeze (freeze x)) -> (freeze x)
12697   if (N0.getOpcode() == ISD::FREEZE)
12698     return N0;
12699 
12700   // If the input is a constant, return it.
12701   if (isIntOrFPConstant(N0))
12702     return N0;
12703 
12704   return SDValue();
12705 }
12706 
12707 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12708 /// operands. DstEltVT indicates the destination element value type.
12709 SDValue DAGCombiner::
12710 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12711   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12712 
12713   // If this is already the right type, we're done.
12714   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12715 
12716   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12717   unsigned DstBitSize = DstEltVT.getSizeInBits();
12718 
12719   // If this is a conversion of N elements of one type to N elements of another
12720   // type, convert each element.  This handles FP<->INT cases.
12721   if (SrcBitSize == DstBitSize) {
12722     SmallVector<SDValue, 8> Ops;
12723     for (SDValue Op : BV->op_values()) {
12724       // If the vector element type is not legal, the BUILD_VECTOR operands
12725       // are promoted and implicitly truncated.  Make that explicit here.
12726       if (Op.getValueType() != SrcEltVT)
12727         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12728       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12729       AddToWorklist(Ops.back().getNode());
12730     }
12731     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12732                               BV->getValueType(0).getVectorNumElements());
12733     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12734   }
12735 
12736   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12737   // handle annoying details of growing/shrinking FP values, we convert them to
12738   // int first.
12739   if (SrcEltVT.isFloatingPoint()) {
12740     // Convert the input float vector to a int vector where the elements are the
12741     // same sizes.
12742     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12743     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12744     SrcEltVT = IntVT;
12745   }
12746 
12747   // Now we know the input is an integer vector.  If the output is a FP type,
12748   // convert to integer first, then to FP of the right size.
12749   if (DstEltVT.isFloatingPoint()) {
12750     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12751     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12752 
12753     // Next, convert to FP elements of the same size.
12754     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12755   }
12756 
12757   SDLoc DL(BV);
12758 
12759   // Okay, we know the src/dst types are both integers of differing types.
12760   // Handling growing first.
12761   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12762   if (SrcBitSize < DstBitSize) {
12763     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12764 
12765     SmallVector<SDValue, 8> Ops;
12766     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12767          i += NumInputsPerOutput) {
12768       bool isLE = DAG.getDataLayout().isLittleEndian();
12769       APInt NewBits = APInt(DstBitSize, 0);
12770       bool EltIsUndef = true;
12771       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12772         // Shift the previously computed bits over.
12773         NewBits <<= SrcBitSize;
12774         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12775         if (Op.isUndef()) continue;
12776         EltIsUndef = false;
12777 
12778         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12779                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12780       }
12781 
12782       if (EltIsUndef)
12783         Ops.push_back(DAG.getUNDEF(DstEltVT));
12784       else
12785         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12786     }
12787 
12788     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12789     return DAG.getBuildVector(VT, DL, Ops);
12790   }
12791 
12792   // Finally, this must be the case where we are shrinking elements: each input
12793   // turns into multiple outputs.
12794   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12795   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12796                             NumOutputsPerInput*BV->getNumOperands());
12797   SmallVector<SDValue, 8> Ops;
12798 
12799   for (const SDValue &Op : BV->op_values()) {
12800     if (Op.isUndef()) {
12801       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12802       continue;
12803     }
12804 
12805     APInt OpVal = cast<ConstantSDNode>(Op)->
12806                   getAPIntValue().zextOrTrunc(SrcBitSize);
12807 
12808     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12809       APInt ThisVal = OpVal.trunc(DstBitSize);
12810       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12811       OpVal.lshrInPlace(DstBitSize);
12812     }
12813 
12814     // For big endian targets, swap the order of the pieces of each element.
12815     if (DAG.getDataLayout().isBigEndian())
12816       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12817   }
12818 
12819   return DAG.getBuildVector(VT, DL, Ops);
12820 }
12821 
12822 static bool isContractable(SDNode *N) {
12823   SDNodeFlags F = N->getFlags();
12824   return F.hasAllowContract() || F.hasAllowReassociation();
12825 }
12826 
12827 /// Try to perform FMA combining on a given FADD node.
12828 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12829   SDValue N0 = N->getOperand(0);
12830   SDValue N1 = N->getOperand(1);
12831   EVT VT = N->getValueType(0);
12832   SDLoc SL(N);
12833 
12834   const TargetOptions &Options = DAG.getTarget().Options;
12835 
12836   // Floating-point multiply-add with intermediate rounding.
12837   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12838 
12839   // Floating-point multiply-add without intermediate rounding.
12840   bool HasFMA =
12841       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12842       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12843 
12844   // No valid opcode, do not combine.
12845   if (!HasFMAD && !HasFMA)
12846     return SDValue();
12847 
12848   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12849   bool CanReassociate =
12850       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
12851   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12852                               CanFuse || HasFMAD);
12853   // If the addition is not contractable, do not combine.
12854   if (!AllowFusionGlobally && !isContractable(N))
12855     return SDValue();
12856 
12857   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
12858     return SDValue();
12859 
12860   // Always prefer FMAD to FMA for precision.
12861   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12862   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12863 
12864   // Is the node an FMUL and contractable either due to global flags or
12865   // SDNodeFlags.
12866   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12867     if (N.getOpcode() != ISD::FMUL)
12868       return false;
12869     return AllowFusionGlobally || isContractable(N.getNode());
12870   };
12871   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12872   // prefer to fold the multiply with fewer uses.
12873   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12874     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12875       std::swap(N0, N1);
12876   }
12877 
12878   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12879   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12880     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12881                        N0.getOperand(1), N1);
12882   }
12883 
12884   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12885   // Note: Commutes FADD operands.
12886   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12887     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
12888                        N1.getOperand(1), N0);
12889   }
12890 
12891   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12892   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12893   // This requires reassociation because it changes the order of operations.
12894   SDValue FMA, E;
12895   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12896       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12897       N0.getOperand(2).hasOneUse()) {
12898     FMA = N0;
12899     E = N1;
12900   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12901              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12902              N1.getOperand(2).hasOneUse()) {
12903     FMA = N1;
12904     E = N0;
12905   }
12906   if (FMA && E) {
12907     SDValue A = FMA.getOperand(0);
12908     SDValue B = FMA.getOperand(1);
12909     SDValue C = FMA.getOperand(2).getOperand(0);
12910     SDValue D = FMA.getOperand(2).getOperand(1);
12911     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
12912     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
12913   }
12914 
12915   // Look through FP_EXTEND nodes to do more combining.
12916 
12917   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12918   if (N0.getOpcode() == ISD::FP_EXTEND) {
12919     SDValue N00 = N0.getOperand(0);
12920     if (isContractableFMUL(N00) &&
12921         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12922                             N00.getValueType())) {
12923       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12924                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12925                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12926                          N1);
12927     }
12928   }
12929 
12930   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12931   // Note: Commutes FADD operands.
12932   if (N1.getOpcode() == ISD::FP_EXTEND) {
12933     SDValue N10 = N1.getOperand(0);
12934     if (isContractableFMUL(N10) &&
12935         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12936                             N10.getValueType())) {
12937       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12938                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
12939                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
12940                          N0);
12941     }
12942   }
12943 
12944   // More folding opportunities when target permits.
12945   if (Aggressive) {
12946     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12947     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12948     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12949                                     SDValue Z) {
12950       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12951                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12952                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12953                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12954                                      Z));
12955     };
12956     if (N0.getOpcode() == PreferredFusedOpcode) {
12957       SDValue N02 = N0.getOperand(2);
12958       if (N02.getOpcode() == ISD::FP_EXTEND) {
12959         SDValue N020 = N02.getOperand(0);
12960         if (isContractableFMUL(N020) &&
12961             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12962                                 N020.getValueType())) {
12963           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12964                                       N020.getOperand(0), N020.getOperand(1),
12965                                       N1);
12966         }
12967       }
12968     }
12969 
12970     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12971     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12972     // FIXME: This turns two single-precision and one double-precision
12973     // operation into two double-precision operations, which might not be
12974     // interesting for all targets, especially GPUs.
12975     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12976                                     SDValue Z) {
12977       return DAG.getNode(
12978           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12979           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12980           DAG.getNode(PreferredFusedOpcode, SL, VT,
12981                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12982                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
12983     };
12984     if (N0.getOpcode() == ISD::FP_EXTEND) {
12985       SDValue N00 = N0.getOperand(0);
12986       if (N00.getOpcode() == PreferredFusedOpcode) {
12987         SDValue N002 = N00.getOperand(2);
12988         if (isContractableFMUL(N002) &&
12989             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12990                                 N00.getValueType())) {
12991           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12992                                       N002.getOperand(0), N002.getOperand(1),
12993                                       N1);
12994         }
12995       }
12996     }
12997 
12998     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12999     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
13000     if (N1.getOpcode() == PreferredFusedOpcode) {
13001       SDValue N12 = N1.getOperand(2);
13002       if (N12.getOpcode() == ISD::FP_EXTEND) {
13003         SDValue N120 = N12.getOperand(0);
13004         if (isContractableFMUL(N120) &&
13005             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13006                                 N120.getValueType())) {
13007           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
13008                                       N120.getOperand(0), N120.getOperand(1),
13009                                       N0);
13010         }
13011       }
13012     }
13013 
13014     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
13015     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
13016     // FIXME: This turns two single-precision and one double-precision
13017     // operation into two double-precision operations, which might not be
13018     // interesting for all targets, especially GPUs.
13019     if (N1.getOpcode() == ISD::FP_EXTEND) {
13020       SDValue N10 = N1.getOperand(0);
13021       if (N10.getOpcode() == PreferredFusedOpcode) {
13022         SDValue N102 = N10.getOperand(2);
13023         if (isContractableFMUL(N102) &&
13024             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13025                                 N10.getValueType())) {
13026           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
13027                                       N102.getOperand(0), N102.getOperand(1),
13028                                       N0);
13029         }
13030       }
13031     }
13032   }
13033 
13034   return SDValue();
13035 }
13036 
13037 /// Try to perform FMA combining on a given FSUB node.
13038 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
13039   SDValue N0 = N->getOperand(0);
13040   SDValue N1 = N->getOperand(1);
13041   EVT VT = N->getValueType(0);
13042   SDLoc SL(N);
13043 
13044   const TargetOptions &Options = DAG.getTarget().Options;
13045   // Floating-point multiply-add with intermediate rounding.
13046   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
13047 
13048   // Floating-point multiply-add without intermediate rounding.
13049   bool HasFMA =
13050       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13051       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13052 
13053   // No valid opcode, do not combine.
13054   if (!HasFMAD && !HasFMA)
13055     return SDValue();
13056 
13057   const SDNodeFlags Flags = N->getFlags();
13058   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
13059   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
13060                               CanFuse || HasFMAD);
13061 
13062   // If the subtraction is not contractable, do not combine.
13063   if (!AllowFusionGlobally && !isContractable(N))
13064     return SDValue();
13065 
13066   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
13067     return SDValue();
13068 
13069   // Always prefer FMAD to FMA for precision.
13070   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13071   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13072   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
13073 
13074   // Is the node an FMUL and contractable either due to global flags or
13075   // SDNodeFlags.
13076   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
13077     if (N.getOpcode() != ISD::FMUL)
13078       return false;
13079     return AllowFusionGlobally || isContractable(N.getNode());
13080   };
13081 
13082   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13083   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
13084     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
13085       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
13086                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
13087     }
13088     return SDValue();
13089   };
13090 
13091   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13092   // Note: Commutes FSUB operands.
13093   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
13094     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
13095       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13096                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
13097                          YZ.getOperand(1), X);
13098     }
13099     return SDValue();
13100   };
13101 
13102   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
13103   // prefer to fold the multiply with fewer uses.
13104   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
13105       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
13106     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
13107     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13108       return V;
13109     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
13110     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13111       return V;
13112   } else {
13113     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
13114     if (SDValue V = tryToFoldXYSubZ(N0, N1))
13115       return V;
13116     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13117     if (SDValue V = tryToFoldXSubYZ(N0, N1))
13118       return V;
13119   }
13120 
13121   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
13122   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
13123       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
13124     SDValue N00 = N0.getOperand(0).getOperand(0);
13125     SDValue N01 = N0.getOperand(0).getOperand(1);
13126     return DAG.getNode(PreferredFusedOpcode, SL, VT,
13127                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
13128                        DAG.getNode(ISD::FNEG, SL, VT, N1));
13129   }
13130 
13131   // Look through FP_EXTEND nodes to do more combining.
13132 
13133   // fold (fsub (fpext (fmul x, y)), z)
13134   //   -> (fma (fpext x), (fpext y), (fneg z))
13135   if (N0.getOpcode() == ISD::FP_EXTEND) {
13136     SDValue N00 = N0.getOperand(0);
13137     if (isContractableFMUL(N00) &&
13138         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13139                             N00.getValueType())) {
13140       return DAG.getNode(PreferredFusedOpcode, SL, VT,
13141                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13142                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13143                          DAG.getNode(ISD::FNEG, SL, VT, N1));
13144     }
13145   }
13146 
13147   // fold (fsub x, (fpext (fmul y, z)))
13148   //   -> (fma (fneg (fpext y)), (fpext z), x)
13149   // Note: Commutes FSUB operands.
13150   if (N1.getOpcode() == ISD::FP_EXTEND) {
13151     SDValue N10 = N1.getOperand(0);
13152     if (isContractableFMUL(N10) &&
13153         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13154                             N10.getValueType())) {
13155       return DAG.getNode(
13156           PreferredFusedOpcode, SL, VT,
13157           DAG.getNode(ISD::FNEG, SL, VT,
13158                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
13159           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
13160     }
13161   }
13162 
13163   // fold (fsub (fpext (fneg (fmul, x, y))), z)
13164   //   -> (fneg (fma (fpext x), (fpext y), z))
13165   // Note: This could be removed with appropriate canonicalization of the
13166   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13167   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13168   // from implementing the canonicalization in visitFSUB.
13169   if (N0.getOpcode() == ISD::FP_EXTEND) {
13170     SDValue N00 = N0.getOperand(0);
13171     if (N00.getOpcode() == ISD::FNEG) {
13172       SDValue N000 = N00.getOperand(0);
13173       if (isContractableFMUL(N000) &&
13174           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13175                               N00.getValueType())) {
13176         return DAG.getNode(
13177             ISD::FNEG, SL, VT,
13178             DAG.getNode(PreferredFusedOpcode, SL, VT,
13179                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13180                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13181                         N1));
13182       }
13183     }
13184   }
13185 
13186   // fold (fsub (fneg (fpext (fmul, x, y))), z)
13187   //   -> (fneg (fma (fpext x)), (fpext y), z)
13188   // Note: This could be removed with appropriate canonicalization of the
13189   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
13190   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
13191   // from implementing the canonicalization in visitFSUB.
13192   if (N0.getOpcode() == ISD::FNEG) {
13193     SDValue N00 = N0.getOperand(0);
13194     if (N00.getOpcode() == ISD::FP_EXTEND) {
13195       SDValue N000 = N00.getOperand(0);
13196       if (isContractableFMUL(N000) &&
13197           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13198                               N000.getValueType())) {
13199         return DAG.getNode(
13200             ISD::FNEG, SL, VT,
13201             DAG.getNode(PreferredFusedOpcode, SL, VT,
13202                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
13203                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
13204                         N1));
13205       }
13206     }
13207   }
13208 
13209   // More folding opportunities when target permits.
13210   if (Aggressive) {
13211     // fold (fsub (fma x, y, (fmul u, v)), z)
13212     //   -> (fma x, y (fma u, v, (fneg z)))
13213     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
13214         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
13215         N0.getOperand(2)->hasOneUse()) {
13216       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
13217                          N0.getOperand(1),
13218                          DAG.getNode(PreferredFusedOpcode, SL, VT,
13219                                      N0.getOperand(2).getOperand(0),
13220                                      N0.getOperand(2).getOperand(1),
13221                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
13222     }
13223 
13224     // fold (fsub x, (fma y, z, (fmul u, v)))
13225     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
13226     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
13227         isContractableFMUL(N1.getOperand(2)) &&
13228         N1->hasOneUse() && NoSignedZero) {
13229       SDValue N20 = N1.getOperand(2).getOperand(0);
13230       SDValue N21 = N1.getOperand(2).getOperand(1);
13231       return DAG.getNode(
13232           PreferredFusedOpcode, SL, VT,
13233           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13234           DAG.getNode(PreferredFusedOpcode, SL, VT,
13235                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
13236     }
13237 
13238 
13239     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
13240     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
13241     if (N0.getOpcode() == PreferredFusedOpcode &&
13242         N0->hasOneUse()) {
13243       SDValue N02 = N0.getOperand(2);
13244       if (N02.getOpcode() == ISD::FP_EXTEND) {
13245         SDValue N020 = N02.getOperand(0);
13246         if (isContractableFMUL(N020) &&
13247             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13248                                 N020.getValueType())) {
13249           return DAG.getNode(
13250               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
13251               DAG.getNode(
13252                   PreferredFusedOpcode, SL, VT,
13253                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
13254                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
13255                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13256         }
13257       }
13258     }
13259 
13260     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
13261     //   -> (fma (fpext x), (fpext y),
13262     //           (fma (fpext u), (fpext v), (fneg z)))
13263     // FIXME: This turns two single-precision and one double-precision
13264     // operation into two double-precision operations, which might not be
13265     // interesting for all targets, especially GPUs.
13266     if (N0.getOpcode() == ISD::FP_EXTEND) {
13267       SDValue N00 = N0.getOperand(0);
13268       if (N00.getOpcode() == PreferredFusedOpcode) {
13269         SDValue N002 = N00.getOperand(2);
13270         if (isContractableFMUL(N002) &&
13271             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13272                                 N00.getValueType())) {
13273           return DAG.getNode(
13274               PreferredFusedOpcode, SL, VT,
13275               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
13276               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
13277               DAG.getNode(
13278                   PreferredFusedOpcode, SL, VT,
13279                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
13280                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
13281                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
13282         }
13283       }
13284     }
13285 
13286     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
13287     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
13288     if (N1.getOpcode() == PreferredFusedOpcode &&
13289         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
13290         N1->hasOneUse()) {
13291       SDValue N120 = N1.getOperand(2).getOperand(0);
13292       if (isContractableFMUL(N120) &&
13293           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13294                               N120.getValueType())) {
13295         SDValue N1200 = N120.getOperand(0);
13296         SDValue N1201 = N120.getOperand(1);
13297         return DAG.getNode(
13298             PreferredFusedOpcode, SL, VT,
13299             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
13300             DAG.getNode(PreferredFusedOpcode, SL, VT,
13301                         DAG.getNode(ISD::FNEG, SL, VT,
13302                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
13303                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
13304       }
13305     }
13306 
13307     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
13308     //   -> (fma (fneg (fpext y)), (fpext z),
13309     //           (fma (fneg (fpext u)), (fpext v), x))
13310     // FIXME: This turns two single-precision and one double-precision
13311     // operation into two double-precision operations, which might not be
13312     // interesting for all targets, especially GPUs.
13313     if (N1.getOpcode() == ISD::FP_EXTEND &&
13314         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13315       SDValue CvtSrc = N1.getOperand(0);
13316       SDValue N100 = CvtSrc.getOperand(0);
13317       SDValue N101 = CvtSrc.getOperand(1);
13318       SDValue N102 = CvtSrc.getOperand(2);
13319       if (isContractableFMUL(N102) &&
13320           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13321                               CvtSrc.getValueType())) {
13322         SDValue N1020 = N102.getOperand(0);
13323         SDValue N1021 = N102.getOperand(1);
13324         return DAG.getNode(
13325             PreferredFusedOpcode, SL, VT,
13326             DAG.getNode(ISD::FNEG, SL, VT,
13327                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13328             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13329             DAG.getNode(PreferredFusedOpcode, SL, VT,
13330                         DAG.getNode(ISD::FNEG, SL, VT,
13331                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13332                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13333       }
13334     }
13335   }
13336 
13337   return SDValue();
13338 }
13339 
13340 /// Try to perform FMA combining on a given FMUL node based on the distributive
13341 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13342 /// subtraction instead of addition).
13343 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13344   SDValue N0 = N->getOperand(0);
13345   SDValue N1 = N->getOperand(1);
13346   EVT VT = N->getValueType(0);
13347   SDLoc SL(N);
13348 
13349   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13350 
13351   const TargetOptions &Options = DAG.getTarget().Options;
13352 
13353   // The transforms below are incorrect when x == 0 and y == inf, because the
13354   // intermediate multiplication produces a nan.
13355   if (!Options.NoInfsFPMath)
13356     return SDValue();
13357 
13358   // Floating-point multiply-add without intermediate rounding.
13359   bool HasFMA =
13360       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13361       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13362       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13363 
13364   // Floating-point multiply-add with intermediate rounding. This can result
13365   // in a less precise result due to the changed rounding order.
13366   bool HasFMAD = Options.UnsafeFPMath &&
13367                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13368 
13369   // No valid opcode, do not combine.
13370   if (!HasFMAD && !HasFMA)
13371     return SDValue();
13372 
13373   // Always prefer FMAD to FMA for precision.
13374   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13375   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13376 
13377   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13378   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13379   auto FuseFADD = [&](SDValue X, SDValue Y) {
13380     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13381       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13382         if (C->isExactlyValue(+1.0))
13383           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13384                              Y);
13385         if (C->isExactlyValue(-1.0))
13386           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13387                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13388       }
13389     }
13390     return SDValue();
13391   };
13392 
13393   if (SDValue FMA = FuseFADD(N0, N1))
13394     return FMA;
13395   if (SDValue FMA = FuseFADD(N1, N0))
13396     return FMA;
13397 
13398   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13399   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13400   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13401   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13402   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13403     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13404       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13405         if (C0->isExactlyValue(+1.0))
13406           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13407                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13408                              Y);
13409         if (C0->isExactlyValue(-1.0))
13410           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13411                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13412                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13413       }
13414       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13415         if (C1->isExactlyValue(+1.0))
13416           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13417                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13418         if (C1->isExactlyValue(-1.0))
13419           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13420                              Y);
13421       }
13422     }
13423     return SDValue();
13424   };
13425 
13426   if (SDValue FMA = FuseFSUB(N0, N1))
13427     return FMA;
13428   if (SDValue FMA = FuseFSUB(N1, N0))
13429     return FMA;
13430 
13431   return SDValue();
13432 }
13433 
13434 SDValue DAGCombiner::visitFADD(SDNode *N) {
13435   SDValue N0 = N->getOperand(0);
13436   SDValue N1 = N->getOperand(1);
13437   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
13438   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
13439   EVT VT = N->getValueType(0);
13440   SDLoc DL(N);
13441   const TargetOptions &Options = DAG.getTarget().Options;
13442   SDNodeFlags Flags = N->getFlags();
13443   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13444 
13445   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13446     return R;
13447 
13448   // fold vector ops
13449   if (VT.isVector())
13450     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13451       return FoldedVOp;
13452 
13453   // fold (fadd c1, c2) -> c1 + c2
13454   if (N0CFP && N1CFP)
13455     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13456 
13457   // canonicalize constant to RHS
13458   if (N0CFP && !N1CFP)
13459     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13460 
13461   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13462   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13463   if (N1C && N1C->isZero())
13464     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13465       return N0;
13466 
13467   if (SDValue NewSel = foldBinOpIntoSelect(N))
13468     return NewSel;
13469 
13470   // fold (fadd A, (fneg B)) -> (fsub A, B)
13471   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13472     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13473             N1, DAG, LegalOperations, ForCodeSize))
13474       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13475 
13476   // fold (fadd (fneg A), B) -> (fsub B, A)
13477   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13478     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13479             N0, DAG, LegalOperations, ForCodeSize))
13480       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13481 
13482   auto isFMulNegTwo = [](SDValue FMul) {
13483     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13484       return false;
13485     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13486     return C && C->isExactlyValue(-2.0);
13487   };
13488 
13489   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13490   if (isFMulNegTwo(N0)) {
13491     SDValue B = N0.getOperand(0);
13492     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13493     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13494   }
13495   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13496   if (isFMulNegTwo(N1)) {
13497     SDValue B = N1.getOperand(0);
13498     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13499     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13500   }
13501 
13502   // No FP constant should be created after legalization as Instruction
13503   // Selection pass has a hard time dealing with FP constants.
13504   bool AllowNewConst = (Level < AfterLegalizeDAG);
13505 
13506   // If nnan is enabled, fold lots of things.
13507   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13508     // If allowed, fold (fadd (fneg x), x) -> 0.0
13509     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13510       return DAG.getConstantFP(0.0, DL, VT);
13511 
13512     // If allowed, fold (fadd x, (fneg x)) -> 0.0
13513     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13514       return DAG.getConstantFP(0.0, DL, VT);
13515   }
13516 
13517   // If 'unsafe math' or reassoc and nsz, fold lots of things.
13518   // TODO: break out portions of the transformations below for which Unsafe is
13519   //       considered and which do not require both nsz and reassoc
13520   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13521        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13522       AllowNewConst) {
13523     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13524     if (N1CFP && N0.getOpcode() == ISD::FADD &&
13525         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13526       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13527       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13528     }
13529 
13530     // We can fold chains of FADD's of the same value into multiplications.
13531     // This transform is not safe in general because we are reducing the number
13532     // of rounding steps.
13533     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13534       if (N0.getOpcode() == ISD::FMUL) {
13535         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13536         bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13537 
13538         // (fadd (fmul x, c), x) -> (fmul x, c+1)
13539         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13540           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13541                                        DAG.getConstantFP(1.0, DL, VT));
13542           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13543         }
13544 
13545         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13546         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13547             N1.getOperand(0) == N1.getOperand(1) &&
13548             N0.getOperand(0) == N1.getOperand(0)) {
13549           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13550                                        DAG.getConstantFP(2.0, DL, VT));
13551           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13552         }
13553       }
13554 
13555       if (N1.getOpcode() == ISD::FMUL) {
13556         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13557         bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13558 
13559         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13560         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13561           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13562                                        DAG.getConstantFP(1.0, DL, VT));
13563           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13564         }
13565 
13566         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13567         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13568             N0.getOperand(0) == N0.getOperand(1) &&
13569             N1.getOperand(0) == N0.getOperand(0)) {
13570           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13571                                        DAG.getConstantFP(2.0, DL, VT));
13572           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13573         }
13574       }
13575 
13576       if (N0.getOpcode() == ISD::FADD) {
13577         bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13578         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13579         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13580             (N0.getOperand(0) == N1)) {
13581           return DAG.getNode(ISD::FMUL, DL, VT, N1,
13582                              DAG.getConstantFP(3.0, DL, VT));
13583         }
13584       }
13585 
13586       if (N1.getOpcode() == ISD::FADD) {
13587         bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13588         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13589         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13590             N1.getOperand(0) == N0) {
13591           return DAG.getNode(ISD::FMUL, DL, VT, N0,
13592                              DAG.getConstantFP(3.0, DL, VT));
13593         }
13594       }
13595 
13596       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13597       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13598           N0.getOperand(0) == N0.getOperand(1) &&
13599           N1.getOperand(0) == N1.getOperand(1) &&
13600           N0.getOperand(0) == N1.getOperand(0)) {
13601         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13602                            DAG.getConstantFP(4.0, DL, VT));
13603       }
13604     }
13605   } // enable-unsafe-fp-math
13606 
13607   // FADD -> FMA combines:
13608   if (SDValue Fused = visitFADDForFMACombine(N)) {
13609     AddToWorklist(Fused.getNode());
13610     return Fused;
13611   }
13612   return SDValue();
13613 }
13614 
13615 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13616   SDValue Chain = N->getOperand(0);
13617   SDValue N0 = N->getOperand(1);
13618   SDValue N1 = N->getOperand(2);
13619   EVT VT = N->getValueType(0);
13620   EVT ChainVT = N->getValueType(1);
13621   SDLoc DL(N);
13622   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13623 
13624   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13625   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13626     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13627             N1, DAG, LegalOperations, ForCodeSize)) {
13628       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13629                          {Chain, N0, NegN1});
13630     }
13631 
13632   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13633   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13634     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13635             N0, DAG, LegalOperations, ForCodeSize)) {
13636       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13637                          {Chain, N1, NegN0});
13638     }
13639   return SDValue();
13640 }
13641 
13642 SDValue DAGCombiner::visitFSUB(SDNode *N) {
13643   SDValue N0 = N->getOperand(0);
13644   SDValue N1 = N->getOperand(1);
13645   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13646   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13647   EVT VT = N->getValueType(0);
13648   SDLoc DL(N);
13649   const TargetOptions &Options = DAG.getTarget().Options;
13650   const SDNodeFlags Flags = N->getFlags();
13651   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13652 
13653   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13654     return R;
13655 
13656   // fold vector ops
13657   if (VT.isVector())
13658     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13659       return FoldedVOp;
13660 
13661   // fold (fsub c1, c2) -> c1-c2
13662   if (N0CFP && N1CFP)
13663     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13664 
13665   if (SDValue NewSel = foldBinOpIntoSelect(N))
13666     return NewSel;
13667 
13668   // (fsub A, 0) -> A
13669   if (N1CFP && N1CFP->isZero()) {
13670     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13671         Flags.hasNoSignedZeros()) {
13672       return N0;
13673     }
13674   }
13675 
13676   if (N0 == N1) {
13677     // (fsub x, x) -> 0.0
13678     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13679       return DAG.getConstantFP(0.0f, DL, VT);
13680   }
13681 
13682   // (fsub -0.0, N1) -> -N1
13683   if (N0CFP && N0CFP->isZero()) {
13684     if (N0CFP->isNegative() ||
13685         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13686       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
13687       // flushed to zero, unless all users treat denorms as zero (DAZ).
13688       // FIXME: This transform will change the sign of a NaN and the behavior
13689       // of a signaling NaN. It is only valid when a NoNaN flag is present.
13690       DenormalMode DenormMode = DAG.getDenormalMode(VT);
13691       if (DenormMode == DenormalMode::getIEEE()) {
13692         if (SDValue NegN1 =
13693                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13694           return NegN1;
13695         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13696           return DAG.getNode(ISD::FNEG, DL, VT, N1);
13697       }
13698     }
13699   }
13700 
13701   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13702        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13703       N1.getOpcode() == ISD::FADD) {
13704     // X - (X + Y) -> -Y
13705     if (N0 == N1->getOperand(0))
13706       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13707     // X - (Y + X) -> -Y
13708     if (N0 == N1->getOperand(1))
13709       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13710   }
13711 
13712   // fold (fsub A, (fneg B)) -> (fadd A, B)
13713   if (SDValue NegN1 =
13714           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13715     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13716 
13717   // FSUB -> FMA combines:
13718   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13719     AddToWorklist(Fused.getNode());
13720     return Fused;
13721   }
13722 
13723   return SDValue();
13724 }
13725 
13726 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13727   SDValue N0 = N->getOperand(0);
13728   SDValue N1 = N->getOperand(1);
13729   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13730   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13731   EVT VT = N->getValueType(0);
13732   SDLoc DL(N);
13733   const TargetOptions &Options = DAG.getTarget().Options;
13734   const SDNodeFlags Flags = N->getFlags();
13735   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13736 
13737   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13738     return R;
13739 
13740   // fold vector ops
13741   if (VT.isVector()) {
13742     // This just handles C1 * C2 for vectors. Other vector folds are below.
13743     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13744       return FoldedVOp;
13745   }
13746 
13747   // fold (fmul c1, c2) -> c1*c2
13748   if (N0CFP && N1CFP)
13749     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13750 
13751   // canonicalize constant to RHS
13752   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13753      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13754     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13755 
13756   if (SDValue NewSel = foldBinOpIntoSelect(N))
13757     return NewSel;
13758 
13759   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13760     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13761     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13762         N0.getOpcode() == ISD::FMUL) {
13763       SDValue N00 = N0.getOperand(0);
13764       SDValue N01 = N0.getOperand(1);
13765       // Avoid an infinite loop by making sure that N00 is not a constant
13766       // (the inner multiply has not been constant folded yet).
13767       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
13768           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
13769         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13770         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13771       }
13772     }
13773 
13774     // Match a special-case: we convert X * 2.0 into fadd.
13775     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13776     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13777         N0.getOperand(0) == N0.getOperand(1)) {
13778       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13779       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13780       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13781     }
13782   }
13783 
13784   // fold (fmul X, 2.0) -> (fadd X, X)
13785   if (N1CFP && N1CFP->isExactlyValue(+2.0))
13786     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13787 
13788   // fold (fmul X, -1.0) -> (fneg X)
13789   if (N1CFP && N1CFP->isExactlyValue(-1.0))
13790     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13791       return DAG.getNode(ISD::FNEG, DL, VT, N0);
13792 
13793   // -N0 * -N1 --> N0 * N1
13794   TargetLowering::NegatibleCost CostN0 =
13795       TargetLowering::NegatibleCost::Expensive;
13796   TargetLowering::NegatibleCost CostN1 =
13797       TargetLowering::NegatibleCost::Expensive;
13798   SDValue NegN0 =
13799       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13800   SDValue NegN1 =
13801       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13802   if (NegN0 && NegN1 &&
13803       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13804        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13805     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13806 
13807   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13808   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13809   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13810       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13811       TLI.isOperationLegal(ISD::FABS, VT)) {
13812     SDValue Select = N0, X = N1;
13813     if (Select.getOpcode() != ISD::SELECT)
13814       std::swap(Select, X);
13815 
13816     SDValue Cond = Select.getOperand(0);
13817     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13818     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13819 
13820     if (TrueOpnd && FalseOpnd &&
13821         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13822         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13823         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13824       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13825       switch (CC) {
13826       default: break;
13827       case ISD::SETOLT:
13828       case ISD::SETULT:
13829       case ISD::SETOLE:
13830       case ISD::SETULE:
13831       case ISD::SETLT:
13832       case ISD::SETLE:
13833         std::swap(TrueOpnd, FalseOpnd);
13834         LLVM_FALLTHROUGH;
13835       case ISD::SETOGT:
13836       case ISD::SETUGT:
13837       case ISD::SETOGE:
13838       case ISD::SETUGE:
13839       case ISD::SETGT:
13840       case ISD::SETGE:
13841         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13842             TLI.isOperationLegal(ISD::FNEG, VT))
13843           return DAG.getNode(ISD::FNEG, DL, VT,
13844                    DAG.getNode(ISD::FABS, DL, VT, X));
13845         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13846           return DAG.getNode(ISD::FABS, DL, VT, X);
13847 
13848         break;
13849       }
13850     }
13851   }
13852 
13853   // FMUL -> FMA combines:
13854   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13855     AddToWorklist(Fused.getNode());
13856     return Fused;
13857   }
13858 
13859   return SDValue();
13860 }
13861 
13862 SDValue DAGCombiner::visitFMA(SDNode *N) {
13863   SDValue N0 = N->getOperand(0);
13864   SDValue N1 = N->getOperand(1);
13865   SDValue N2 = N->getOperand(2);
13866   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13867   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13868   EVT VT = N->getValueType(0);
13869   SDLoc DL(N);
13870   const TargetOptions &Options = DAG.getTarget().Options;
13871   // FMA nodes have flags that propagate to the created nodes.
13872   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13873 
13874   bool UnsafeFPMath =
13875       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13876 
13877   // Constant fold FMA.
13878   if (isa<ConstantFPSDNode>(N0) &&
13879       isa<ConstantFPSDNode>(N1) &&
13880       isa<ConstantFPSDNode>(N2)) {
13881     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13882   }
13883 
13884   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13885   TargetLowering::NegatibleCost CostN0 =
13886       TargetLowering::NegatibleCost::Expensive;
13887   TargetLowering::NegatibleCost CostN1 =
13888       TargetLowering::NegatibleCost::Expensive;
13889   SDValue NegN0 =
13890       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13891   SDValue NegN1 =
13892       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13893   if (NegN0 && NegN1 &&
13894       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13895        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13896     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
13897 
13898   if (UnsafeFPMath) {
13899     if (N0CFP && N0CFP->isZero())
13900       return N2;
13901     if (N1CFP && N1CFP->isZero())
13902       return N2;
13903   }
13904 
13905   if (N0CFP && N0CFP->isExactlyValue(1.0))
13906     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13907   if (N1CFP && N1CFP->isExactlyValue(1.0))
13908     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13909 
13910   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13911   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
13912      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
13913     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13914 
13915   if (UnsafeFPMath) {
13916     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13917     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13918         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13919         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13920       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13921                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
13922     }
13923 
13924     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13925     if (N0.getOpcode() == ISD::FMUL &&
13926         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
13927         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13928       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13929                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
13930                          N2);
13931     }
13932   }
13933 
13934   // (fma x, -1, y) -> (fadd (fneg x), y)
13935   if (N1CFP) {
13936     if (N1CFP->isExactlyValue(1.0))
13937       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13938 
13939     if (N1CFP->isExactlyValue(-1.0) &&
13940         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13941       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13942       AddToWorklist(RHSNeg.getNode());
13943       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13944     }
13945 
13946     // fma (fneg x), K, y -> fma x -K, y
13947     if (N0.getOpcode() == ISD::FNEG &&
13948         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13949          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13950                                               ForCodeSize)))) {
13951       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13952                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
13953     }
13954   }
13955 
13956   if (UnsafeFPMath) {
13957     // (fma x, c, x) -> (fmul x, (c+1))
13958     if (N1CFP && N0 == N2) {
13959       return DAG.getNode(
13960           ISD::FMUL, DL, VT, N0,
13961           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
13962     }
13963 
13964     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13965     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13966       return DAG.getNode(
13967           ISD::FMUL, DL, VT, N0,
13968           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
13969     }
13970   }
13971 
13972   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13973   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13974   if (!TLI.isFNegFree(VT))
13975     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13976             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13977       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
13978   return SDValue();
13979 }
13980 
13981 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13982 // reciprocal.
13983 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13984 // Notice that this is not always beneficial. One reason is different targets
13985 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13986 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13987 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13988 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13989   // TODO: Limit this transform based on optsize/minsize - it always creates at
13990   //       least 1 extra instruction. But the perf win may be substantial enough
13991   //       that only minsize should restrict this.
13992   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13993   const SDNodeFlags Flags = N->getFlags();
13994   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13995     return SDValue();
13996 
13997   // Skip if current node is a reciprocal/fneg-reciprocal.
13998   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13999   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
14000   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
14001     return SDValue();
14002 
14003   // Exit early if the target does not want this transform or if there can't
14004   // possibly be enough uses of the divisor to make the transform worthwhile.
14005   unsigned MinUses = TLI.combineRepeatedFPDivisors();
14006 
14007   // For splat vectors, scale the number of uses by the splat factor. If we can
14008   // convert the division into a scalar op, that will likely be much faster.
14009   unsigned NumElts = 1;
14010   EVT VT = N->getValueType(0);
14011   if (VT.isVector() && DAG.isSplatValue(N1))
14012     NumElts = VT.getVectorNumElements();
14013 
14014   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
14015     return SDValue();
14016 
14017   // Find all FDIV users of the same divisor.
14018   // Use a set because duplicates may be present in the user list.
14019   SetVector<SDNode *> Users;
14020   for (auto *U : N1->uses()) {
14021     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
14022       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
14023       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
14024           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
14025           U->getFlags().hasAllowReassociation() &&
14026           U->getFlags().hasNoSignedZeros())
14027         continue;
14028 
14029       // This division is eligible for optimization only if global unsafe math
14030       // is enabled or if this division allows reciprocal formation.
14031       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
14032         Users.insert(U);
14033     }
14034   }
14035 
14036   // Now that we have the actual number of divisor uses, make sure it meets
14037   // the minimum threshold specified by the target.
14038   if ((Users.size() * NumElts) < MinUses)
14039     return SDValue();
14040 
14041   SDLoc DL(N);
14042   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
14043   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
14044 
14045   // Dividend / Divisor -> Dividend * Reciprocal
14046   for (auto *U : Users) {
14047     SDValue Dividend = U->getOperand(0);
14048     if (Dividend != FPOne) {
14049       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
14050                                     Reciprocal, Flags);
14051       CombineTo(U, NewNode);
14052     } else if (U != Reciprocal.getNode()) {
14053       // In the absence of fast-math-flags, this user node is always the
14054       // same node as Reciprocal, but with FMF they may be different nodes.
14055       CombineTo(U, Reciprocal);
14056     }
14057   }
14058   return SDValue(N, 0);  // N was replaced.
14059 }
14060 
14061 SDValue DAGCombiner::visitFDIV(SDNode *N) {
14062   SDValue N0 = N->getOperand(0);
14063   SDValue N1 = N->getOperand(1);
14064   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14065   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14066   EVT VT = N->getValueType(0);
14067   SDLoc DL(N);
14068   const TargetOptions &Options = DAG.getTarget().Options;
14069   SDNodeFlags Flags = N->getFlags();
14070   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14071 
14072   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14073     return R;
14074 
14075   // fold vector ops
14076   if (VT.isVector())
14077     if (SDValue FoldedVOp = SimplifyVBinOp(N))
14078       return FoldedVOp;
14079 
14080   // fold (fdiv c1, c2) -> c1/c2
14081   if (N0CFP && N1CFP)
14082     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
14083 
14084   if (SDValue NewSel = foldBinOpIntoSelect(N))
14085     return NewSel;
14086 
14087   if (SDValue V = combineRepeatedFPDivisors(N))
14088     return V;
14089 
14090   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
14091     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
14092     if (N1CFP) {
14093       // Compute the reciprocal 1.0 / c2.
14094       const APFloat &N1APF = N1CFP->getValueAPF();
14095       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
14096       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
14097       // Only do the transform if the reciprocal is a legal fp immediate that
14098       // isn't too nasty (eg NaN, denormal, ...).
14099       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
14100           (!LegalOperations ||
14101            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
14102            // backend)... we should handle this gracefully after Legalize.
14103            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
14104            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
14105            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
14106         return DAG.getNode(ISD::FMUL, DL, VT, N0,
14107                            DAG.getConstantFP(Recip, DL, VT));
14108     }
14109 
14110     // If this FDIV is part of a reciprocal square root, it may be folded
14111     // into a target-specific square root estimate instruction.
14112     if (N1.getOpcode() == ISD::FSQRT) {
14113       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
14114         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14115     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
14116                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14117       if (SDValue RV =
14118               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14119         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
14120         AddToWorklist(RV.getNode());
14121         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14122       }
14123     } else if (N1.getOpcode() == ISD::FP_ROUND &&
14124                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14125       if (SDValue RV =
14126               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
14127         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
14128         AddToWorklist(RV.getNode());
14129         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
14130       }
14131     } else if (N1.getOpcode() == ISD::FMUL) {
14132       // Look through an FMUL. Even though this won't remove the FDIV directly,
14133       // it's still worthwhile to get rid of the FSQRT if possible.
14134       SDValue Sqrt, Y;
14135       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
14136         Sqrt = N1.getOperand(0);
14137         Y = N1.getOperand(1);
14138       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
14139         Sqrt = N1.getOperand(1);
14140         Y = N1.getOperand(0);
14141       }
14142       if (Sqrt.getNode()) {
14143         // If the other multiply operand is known positive, pull it into the
14144         // sqrt. That will eliminate the division if we convert to an estimate.
14145         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
14146             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
14147           SDValue A;
14148           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
14149             A = Y.getOperand(0);
14150           else if (Y == Sqrt.getOperand(0))
14151             A = Y;
14152           if (A) {
14153             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
14154             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
14155             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
14156             SDValue AAZ =
14157                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
14158             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
14159               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
14160 
14161             // Estimate creation failed. Clean up speculatively created nodes.
14162             recursivelyDeleteUnusedNodes(AAZ.getNode());
14163           }
14164         }
14165 
14166         // We found a FSQRT, so try to make this fold:
14167         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
14168         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
14169           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
14170           AddToWorklist(Div.getNode());
14171           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
14172         }
14173       }
14174     }
14175 
14176     // Fold into a reciprocal estimate and multiply instead of a real divide.
14177     if (Options.NoInfsFPMath || Flags.hasNoInfs())
14178       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
14179         return RV;
14180   }
14181 
14182   // Fold X/Sqrt(X) -> Sqrt(X)
14183   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
14184       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
14185     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
14186       return N1;
14187 
14188   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
14189   TargetLowering::NegatibleCost CostN0 =
14190       TargetLowering::NegatibleCost::Expensive;
14191   TargetLowering::NegatibleCost CostN1 =
14192       TargetLowering::NegatibleCost::Expensive;
14193   SDValue NegN0 =
14194       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
14195   SDValue NegN1 =
14196       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
14197   if (NegN0 && NegN1 &&
14198       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
14199        CostN1 == TargetLowering::NegatibleCost::Cheaper))
14200     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
14201 
14202   return SDValue();
14203 }
14204 
14205 SDValue DAGCombiner::visitFREM(SDNode *N) {
14206   SDValue N0 = N->getOperand(0);
14207   SDValue N1 = N->getOperand(1);
14208   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14209   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
14210   EVT VT = N->getValueType(0);
14211   SDNodeFlags Flags = N->getFlags();
14212   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14213 
14214   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
14215     return R;
14216 
14217   // fold (frem c1, c2) -> fmod(c1,c2)
14218   if (N0CFP && N1CFP)
14219     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
14220 
14221   if (SDValue NewSel = foldBinOpIntoSelect(N))
14222     return NewSel;
14223 
14224   return SDValue();
14225 }
14226 
14227 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
14228   SDNodeFlags Flags = N->getFlags();
14229   const TargetOptions &Options = DAG.getTarget().Options;
14230 
14231   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
14232   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
14233   if (!Flags.hasApproximateFuncs() ||
14234       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
14235     return SDValue();
14236 
14237   SDValue N0 = N->getOperand(0);
14238   if (TLI.isFsqrtCheap(N0, DAG))
14239     return SDValue();
14240 
14241   // FSQRT nodes have flags that propagate to the created nodes.
14242   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
14243   //       transform the fdiv, we may produce a sub-optimal estimate sequence
14244   //       because the reciprocal calculation may not have to filter out a
14245   //       0.0 input.
14246   return buildSqrtEstimate(N0, Flags);
14247 }
14248 
14249 /// copysign(x, fp_extend(y)) -> copysign(x, y)
14250 /// copysign(x, fp_round(y)) -> copysign(x, y)
14251 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
14252   SDValue N1 = N->getOperand(1);
14253   if ((N1.getOpcode() == ISD::FP_EXTEND ||
14254        N1.getOpcode() == ISD::FP_ROUND)) {
14255     EVT N1VT = N1->getValueType(0);
14256     EVT N1Op0VT = N1->getOperand(0).getValueType();
14257 
14258     // Always fold no-op FP casts.
14259     if (N1VT == N1Op0VT)
14260       return true;
14261 
14262     // Do not optimize out type conversion of f128 type yet.
14263     // For some targets like x86_64, configuration is changed to keep one f128
14264     // value in one SSE register, but instruction selection cannot handle
14265     // FCOPYSIGN on SSE registers yet.
14266     if (N1Op0VT == MVT::f128)
14267       return false;
14268 
14269     // Avoid mismatched vector operand types, for better instruction selection.
14270     if (N1Op0VT.isVector())
14271       return false;
14272 
14273     return true;
14274   }
14275   return false;
14276 }
14277 
14278 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
14279   SDValue N0 = N->getOperand(0);
14280   SDValue N1 = N->getOperand(1);
14281   bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
14282   bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
14283   EVT VT = N->getValueType(0);
14284 
14285   if (N0CFP && N1CFP) // Constant fold
14286     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
14287 
14288   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
14289     const APFloat &V = N1C->getValueAPF();
14290     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
14291     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
14292     if (!V.isNegative()) {
14293       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
14294         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14295     } else {
14296       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
14297         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
14298                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
14299     }
14300   }
14301 
14302   // copysign(fabs(x), y) -> copysign(x, y)
14303   // copysign(fneg(x), y) -> copysign(x, y)
14304   // copysign(copysign(x,z), y) -> copysign(x, y)
14305   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
14306       N0.getOpcode() == ISD::FCOPYSIGN)
14307     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
14308 
14309   // copysign(x, abs(y)) -> abs(x)
14310   if (N1.getOpcode() == ISD::FABS)
14311     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14312 
14313   // copysign(x, copysign(y,z)) -> copysign(x, z)
14314   if (N1.getOpcode() == ISD::FCOPYSIGN)
14315     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
14316 
14317   // copysign(x, fp_extend(y)) -> copysign(x, y)
14318   // copysign(x, fp_round(y)) -> copysign(x, y)
14319   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
14320     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
14321 
14322   return SDValue();
14323 }
14324 
14325 SDValue DAGCombiner::visitFPOW(SDNode *N) {
14326   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
14327   if (!ExponentC)
14328     return SDValue();
14329   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14330 
14331   // Try to convert x ** (1/3) into cube root.
14332   // TODO: Handle the various flavors of long double.
14333   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14334   //       Some range near 1/3 should be fine.
14335   EVT VT = N->getValueType(0);
14336   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14337       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14338     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14339     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14340     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14341     // For regular numbers, rounding may cause the results to differ.
14342     // Therefore, we require { nsz ninf nnan afn } for this transform.
14343     // TODO: We could select out the special cases if we don't have nsz/ninf.
14344     SDNodeFlags Flags = N->getFlags();
14345     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14346         !Flags.hasApproximateFuncs())
14347       return SDValue();
14348 
14349     // Do not create a cbrt() libcall if the target does not have it, and do not
14350     // turn a pow that has lowering support into a cbrt() libcall.
14351     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14352         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14353          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14354       return SDValue();
14355 
14356     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14357   }
14358 
14359   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14360   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14361   // TODO: This could be extended (using a target hook) to handle smaller
14362   // power-of-2 fractional exponents.
14363   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14364   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14365   if (ExponentIs025 || ExponentIs075) {
14366     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14367     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14368     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14369     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14370     // For regular numbers, rounding may cause the results to differ.
14371     // Therefore, we require { nsz ninf afn } for this transform.
14372     // TODO: We could select out the special cases if we don't have nsz/ninf.
14373     SDNodeFlags Flags = N->getFlags();
14374 
14375     // We only need no signed zeros for the 0.25 case.
14376     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14377         !Flags.hasApproximateFuncs())
14378       return SDValue();
14379 
14380     // Don't double the number of libcalls. We are trying to inline fast code.
14381     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14382       return SDValue();
14383 
14384     // Assume that libcalls are the smallest code.
14385     // TODO: This restriction should probably be lifted for vectors.
14386     if (ForCodeSize)
14387       return SDValue();
14388 
14389     // pow(X, 0.25) --> sqrt(sqrt(X))
14390     SDLoc DL(N);
14391     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14392     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14393     if (ExponentIs025)
14394       return SqrtSqrt;
14395     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14396     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14397   }
14398 
14399   return SDValue();
14400 }
14401 
14402 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14403                                const TargetLowering &TLI) {
14404   // This optimization is guarded by a function attribute because it may produce
14405   // unexpected results. Ie, programs may be relying on the platform-specific
14406   // undefined behavior when the float-to-int conversion overflows.
14407   const Function &F = DAG.getMachineFunction().getFunction();
14408   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14409   if (StrictOverflow.getValueAsString().equals("false"))
14410     return SDValue();
14411 
14412   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14413   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14414   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14415   // conversions would return +0.0.
14416   // FIXME: We should be able to use node-level FMF here.
14417   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14418   EVT VT = N->getValueType(0);
14419   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14420       !DAG.getTarget().Options.NoSignedZerosFPMath)
14421     return SDValue();
14422 
14423   // fptosi/fptoui round towards zero, so converting from FP to integer and
14424   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14425   SDValue N0 = N->getOperand(0);
14426   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14427       N0.getOperand(0).getValueType() == VT)
14428     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14429 
14430   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14431       N0.getOperand(0).getValueType() == VT)
14432     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14433 
14434   return SDValue();
14435 }
14436 
14437 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14438   SDValue N0 = N->getOperand(0);
14439   EVT VT = N->getValueType(0);
14440   EVT OpVT = N0.getValueType();
14441 
14442   // [us]itofp(undef) = 0, because the result value is bounded.
14443   if (N0.isUndef())
14444     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14445 
14446   // fold (sint_to_fp c1) -> c1fp
14447   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14448       // ...but only if the target supports immediate floating-point values
14449       (!LegalOperations ||
14450        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14451     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14452 
14453   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14454   // but UINT_TO_FP is legal on this target, try to convert.
14455   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14456       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14457     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14458     if (DAG.SignBitIsZero(N0))
14459       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14460   }
14461 
14462   // The next optimizations are desirable only if SELECT_CC can be lowered.
14463   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14464   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14465       !VT.isVector() &&
14466       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14467     SDLoc DL(N);
14468     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14469                          DAG.getConstantFP(0.0, DL, VT));
14470   }
14471 
14472   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14473   //      (select (setcc x, y, cc), 1.0, 0.0)
14474   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14475       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14476       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14477     SDLoc DL(N);
14478     return DAG.getSelect(DL, VT, N0.getOperand(0),
14479                          DAG.getConstantFP(1.0, DL, VT),
14480                          DAG.getConstantFP(0.0, DL, VT));
14481   }
14482 
14483   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14484     return FTrunc;
14485 
14486   return SDValue();
14487 }
14488 
14489 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14490   SDValue N0 = N->getOperand(0);
14491   EVT VT = N->getValueType(0);
14492   EVT OpVT = N0.getValueType();
14493 
14494   // [us]itofp(undef) = 0, because the result value is bounded.
14495   if (N0.isUndef())
14496     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14497 
14498   // fold (uint_to_fp c1) -> c1fp
14499   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14500       // ...but only if the target supports immediate floating-point values
14501       (!LegalOperations ||
14502        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14503     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14504 
14505   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14506   // but SINT_TO_FP is legal on this target, try to convert.
14507   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14508       hasOperation(ISD::SINT_TO_FP, OpVT)) {
14509     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14510     if (DAG.SignBitIsZero(N0))
14511       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14512   }
14513 
14514   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14515   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14516       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14517     SDLoc DL(N);
14518     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14519                          DAG.getConstantFP(0.0, DL, VT));
14520   }
14521 
14522   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14523     return FTrunc;
14524 
14525   return SDValue();
14526 }
14527 
14528 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14529 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14530   SDValue N0 = N->getOperand(0);
14531   EVT VT = N->getValueType(0);
14532 
14533   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14534     return SDValue();
14535 
14536   SDValue Src = N0.getOperand(0);
14537   EVT SrcVT = Src.getValueType();
14538   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14539   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14540 
14541   // We can safely assume the conversion won't overflow the output range,
14542   // because (for example) (uint8_t)18293.f is undefined behavior.
14543 
14544   // Since we can assume the conversion won't overflow, our decision as to
14545   // whether the input will fit in the float should depend on the minimum
14546   // of the input range and output range.
14547 
14548   // This means this is also safe for a signed input and unsigned output, since
14549   // a negative input would lead to undefined behavior.
14550   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14551   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14552   unsigned ActualSize = std::min(InputSize, OutputSize);
14553   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14554 
14555   // We can only fold away the float conversion if the input range can be
14556   // represented exactly in the float range.
14557   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14558     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14559       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14560                                                        : ISD::ZERO_EXTEND;
14561       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14562     }
14563     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14564       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14565     return DAG.getBitcast(VT, Src);
14566   }
14567   return SDValue();
14568 }
14569 
14570 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14571   SDValue N0 = N->getOperand(0);
14572   EVT VT = N->getValueType(0);
14573 
14574   // fold (fp_to_sint undef) -> undef
14575   if (N0.isUndef())
14576     return DAG.getUNDEF(VT);
14577 
14578   // fold (fp_to_sint c1fp) -> c1
14579   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14580     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14581 
14582   return FoldIntToFPToInt(N, DAG);
14583 }
14584 
14585 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14586   SDValue N0 = N->getOperand(0);
14587   EVT VT = N->getValueType(0);
14588 
14589   // fold (fp_to_uint undef) -> undef
14590   if (N0.isUndef())
14591     return DAG.getUNDEF(VT);
14592 
14593   // fold (fp_to_uint c1fp) -> c1
14594   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14595     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14596 
14597   return FoldIntToFPToInt(N, DAG);
14598 }
14599 
14600 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14601   SDValue N0 = N->getOperand(0);
14602   SDValue N1 = N->getOperand(1);
14603   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14604   EVT VT = N->getValueType(0);
14605 
14606   // fold (fp_round c1fp) -> c1fp
14607   if (N0CFP)
14608     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14609 
14610   // fold (fp_round (fp_extend x)) -> x
14611   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14612     return N0.getOperand(0);
14613 
14614   // fold (fp_round (fp_round x)) -> (fp_round x)
14615   if (N0.getOpcode() == ISD::FP_ROUND) {
14616     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14617     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14618 
14619     // Skip this folding if it results in an fp_round from f80 to f16.
14620     //
14621     // f80 to f16 always generates an expensive (and as yet, unimplemented)
14622     // libcall to __truncxfhf2 instead of selecting native f16 conversion
14623     // instructions from f32 or f64.  Moreover, the first (value-preserving)
14624     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14625     // x86.
14626     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14627       return SDValue();
14628 
14629     // If the first fp_round isn't a value preserving truncation, it might
14630     // introduce a tie in the second fp_round, that wouldn't occur in the
14631     // single-step fp_round we want to fold to.
14632     // In other words, double rounding isn't the same as rounding.
14633     // Also, this is a value preserving truncation iff both fp_round's are.
14634     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14635       SDLoc DL(N);
14636       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14637                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14638     }
14639   }
14640 
14641   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14642   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14643     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14644                               N0.getOperand(0), N1);
14645     AddToWorklist(Tmp.getNode());
14646     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14647                        Tmp, N0.getOperand(1));
14648   }
14649 
14650   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14651     return NewVSel;
14652 
14653   return SDValue();
14654 }
14655 
14656 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14657   SDValue N0 = N->getOperand(0);
14658   EVT VT = N->getValueType(0);
14659 
14660   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14661   if (N->hasOneUse() &&
14662       N->use_begin()->getOpcode() == ISD::FP_ROUND)
14663     return SDValue();
14664 
14665   // fold (fp_extend c1fp) -> c1fp
14666   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14667     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14668 
14669   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14670   if (N0.getOpcode() == ISD::FP16_TO_FP &&
14671       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14672     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14673 
14674   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14675   // value of X.
14676   if (N0.getOpcode() == ISD::FP_ROUND
14677       && N0.getConstantOperandVal(1) == 1) {
14678     SDValue In = N0.getOperand(0);
14679     if (In.getValueType() == VT) return In;
14680     if (VT.bitsLT(In.getValueType()))
14681       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14682                          In, N0.getOperand(1));
14683     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14684   }
14685 
14686   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14687   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14688        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14689     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14690     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14691                                      LN0->getChain(),
14692                                      LN0->getBasePtr(), N0.getValueType(),
14693                                      LN0->getMemOperand());
14694     CombineTo(N, ExtLoad);
14695     CombineTo(N0.getNode(),
14696               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14697                           N0.getValueType(), ExtLoad,
14698                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14699               ExtLoad.getValue(1));
14700     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14701   }
14702 
14703   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14704     return NewVSel;
14705 
14706   return SDValue();
14707 }
14708 
14709 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14710   SDValue N0 = N->getOperand(0);
14711   EVT VT = N->getValueType(0);
14712 
14713   // fold (fceil c1) -> fceil(c1)
14714   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14715     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14716 
14717   return SDValue();
14718 }
14719 
14720 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14721   SDValue N0 = N->getOperand(0);
14722   EVT VT = N->getValueType(0);
14723 
14724   // fold (ftrunc c1) -> ftrunc(c1)
14725   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14726     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14727 
14728   // fold ftrunc (known rounded int x) -> x
14729   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14730   // likely to be generated to extract integer from a rounded floating value.
14731   switch (N0.getOpcode()) {
14732   default: break;
14733   case ISD::FRINT:
14734   case ISD::FTRUNC:
14735   case ISD::FNEARBYINT:
14736   case ISD::FFLOOR:
14737   case ISD::FCEIL:
14738     return N0;
14739   }
14740 
14741   return SDValue();
14742 }
14743 
14744 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14745   SDValue N0 = N->getOperand(0);
14746   EVT VT = N->getValueType(0);
14747 
14748   // fold (ffloor c1) -> ffloor(c1)
14749   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14750     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14751 
14752   return SDValue();
14753 }
14754 
14755 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14756   SDValue N0 = N->getOperand(0);
14757   EVT VT = N->getValueType(0);
14758   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14759 
14760   // Constant fold FNEG.
14761   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14762     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14763 
14764   if (SDValue NegN0 =
14765           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14766     return NegN0;
14767 
14768   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14769   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14770   // know it was called from a context with a nsz flag if the input fsub does
14771   // not.
14772   if (N0.getOpcode() == ISD::FSUB &&
14773       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14774        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14775     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14776                        N0.getOperand(0));
14777   }
14778 
14779   if (SDValue Cast = foldSignChangeInBitcast(N))
14780     return Cast;
14781 
14782   return SDValue();
14783 }
14784 
14785 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14786                             APFloat (*Op)(const APFloat &, const APFloat &)) {
14787   SDValue N0 = N->getOperand(0);
14788   SDValue N1 = N->getOperand(1);
14789   EVT VT = N->getValueType(0);
14790   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14791   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14792   const SDNodeFlags Flags = N->getFlags();
14793   unsigned Opc = N->getOpcode();
14794   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14795   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14796   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14797 
14798   if (N0CFP && N1CFP) {
14799     const APFloat &C0 = N0CFP->getValueAPF();
14800     const APFloat &C1 = N1CFP->getValueAPF();
14801     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14802   }
14803 
14804   // Canonicalize to constant on RHS.
14805   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
14806       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
14807     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14808 
14809   if (N1CFP) {
14810     const APFloat &AF = N1CFP->getValueAPF();
14811 
14812     // minnum(X, nan) -> X
14813     // maxnum(X, nan) -> X
14814     // minimum(X, nan) -> nan
14815     // maximum(X, nan) -> nan
14816     if (AF.isNaN())
14817       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14818 
14819     // In the following folds, inf can be replaced with the largest finite
14820     // float, if the ninf flag is set.
14821     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
14822       // minnum(X, -inf) -> -inf
14823       // maxnum(X, +inf) -> +inf
14824       // minimum(X, -inf) -> -inf if nnan
14825       // maximum(X, +inf) -> +inf if nnan
14826       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
14827         return N->getOperand(1);
14828 
14829       // minnum(X, +inf) -> X if nnan
14830       // maxnum(X, -inf) -> X if nnan
14831       // minimum(X, +inf) -> X
14832       // maximum(X, -inf) -> X
14833       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
14834         return N->getOperand(0);
14835     }
14836   }
14837 
14838   return SDValue();
14839 }
14840 
14841 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14842   return visitFMinMax(DAG, N, minnum);
14843 }
14844 
14845 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14846   return visitFMinMax(DAG, N, maxnum);
14847 }
14848 
14849 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14850   return visitFMinMax(DAG, N, minimum);
14851 }
14852 
14853 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14854   return visitFMinMax(DAG, N, maximum);
14855 }
14856 
14857 SDValue DAGCombiner::visitFABS(SDNode *N) {
14858   SDValue N0 = N->getOperand(0);
14859   EVT VT = N->getValueType(0);
14860 
14861   // fold (fabs c1) -> fabs(c1)
14862   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
14863     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14864 
14865   // fold (fabs (fabs x)) -> (fabs x)
14866   if (N0.getOpcode() == ISD::FABS)
14867     return N->getOperand(0);
14868 
14869   // fold (fabs (fneg x)) -> (fabs x)
14870   // fold (fabs (fcopysign x, y)) -> (fabs x)
14871   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
14872     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
14873 
14874   if (SDValue Cast = foldSignChangeInBitcast(N))
14875     return Cast;
14876 
14877   return SDValue();
14878 }
14879 
14880 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14881   SDValue Chain = N->getOperand(0);
14882   SDValue N1 = N->getOperand(1);
14883   SDValue N2 = N->getOperand(2);
14884 
14885   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
14886   // nondeterministic jumps).
14887   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
14888     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
14889                        N1->getOperand(0), N2);
14890   }
14891 
14892   // If N is a constant we could fold this into a fallthrough or unconditional
14893   // branch. However that doesn't happen very often in normal code, because
14894   // Instcombine/SimplifyCFG should have handled the available opportunities.
14895   // If we did this folding here, it would be necessary to update the
14896   // MachineBasicBlock CFG, which is awkward.
14897 
14898   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14899   // on the target.
14900   if (N1.getOpcode() == ISD::SETCC &&
14901       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14902                                    N1.getOperand(0).getValueType())) {
14903     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14904                        Chain, N1.getOperand(2),
14905                        N1.getOperand(0), N1.getOperand(1), N2);
14906   }
14907 
14908   if (N1.hasOneUse()) {
14909     // rebuildSetCC calls visitXor which may change the Chain when there is a
14910     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14911     HandleSDNode ChainHandle(Chain);
14912     if (SDValue NewN1 = rebuildSetCC(N1))
14913       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14914                          ChainHandle.getValue(), NewN1, N2);
14915   }
14916 
14917   return SDValue();
14918 }
14919 
14920 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14921   if (N.getOpcode() == ISD::SRL ||
14922       (N.getOpcode() == ISD::TRUNCATE &&
14923        (N.getOperand(0).hasOneUse() &&
14924         N.getOperand(0).getOpcode() == ISD::SRL))) {
14925     // Look pass the truncate.
14926     if (N.getOpcode() == ISD::TRUNCATE)
14927       N = N.getOperand(0);
14928 
14929     // Match this pattern so that we can generate simpler code:
14930     //
14931     //   %a = ...
14932     //   %b = and i32 %a, 2
14933     //   %c = srl i32 %b, 1
14934     //   brcond i32 %c ...
14935     //
14936     // into
14937     //
14938     //   %a = ...
14939     //   %b = and i32 %a, 2
14940     //   %c = setcc eq %b, 0
14941     //   brcond %c ...
14942     //
14943     // This applies only when the AND constant value has one bit set and the
14944     // SRL constant is equal to the log2 of the AND constant. The back-end is
14945     // smart enough to convert the result into a TEST/JMP sequence.
14946     SDValue Op0 = N.getOperand(0);
14947     SDValue Op1 = N.getOperand(1);
14948 
14949     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14950       SDValue AndOp1 = Op0.getOperand(1);
14951 
14952       if (AndOp1.getOpcode() == ISD::Constant) {
14953         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14954 
14955         if (AndConst.isPowerOf2() &&
14956             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14957           SDLoc DL(N);
14958           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14959                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14960                               ISD::SETNE);
14961         }
14962       }
14963     }
14964   }
14965 
14966   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14967   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14968   if (N.getOpcode() == ISD::XOR) {
14969     // Because we may call this on a speculatively constructed
14970     // SimplifiedSetCC Node, we need to simplify this node first.
14971     // Ideally this should be folded into SimplifySetCC and not
14972     // here. For now, grab a handle to N so we don't lose it from
14973     // replacements interal to the visit.
14974     HandleSDNode XORHandle(N);
14975     while (N.getOpcode() == ISD::XOR) {
14976       SDValue Tmp = visitXOR(N.getNode());
14977       // No simplification done.
14978       if (!Tmp.getNode())
14979         break;
14980       // Returning N is form in-visit replacement that may invalidated
14981       // N. Grab value from Handle.
14982       if (Tmp.getNode() == N.getNode())
14983         N = XORHandle.getValue();
14984       else // Node simplified. Try simplifying again.
14985         N = Tmp;
14986     }
14987 
14988     if (N.getOpcode() != ISD::XOR)
14989       return N;
14990 
14991     SDValue Op0 = N->getOperand(0);
14992     SDValue Op1 = N->getOperand(1);
14993 
14994     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14995       bool Equal = false;
14996       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14997       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14998           Op0.getValueType() == MVT::i1) {
14999         N = Op0;
15000         Op0 = N->getOperand(0);
15001         Op1 = N->getOperand(1);
15002         Equal = true;
15003       }
15004 
15005       EVT SetCCVT = N.getValueType();
15006       if (LegalTypes)
15007         SetCCVT = getSetCCResultType(SetCCVT);
15008       // Replace the uses of XOR with SETCC
15009       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
15010                           Equal ? ISD::SETEQ : ISD::SETNE);
15011     }
15012   }
15013 
15014   return SDValue();
15015 }
15016 
15017 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
15018 //
15019 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
15020   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
15021   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
15022 
15023   // If N is a constant we could fold this into a fallthrough or unconditional
15024   // branch. However that doesn't happen very often in normal code, because
15025   // Instcombine/SimplifyCFG should have handled the available opportunities.
15026   // If we did this folding here, it would be necessary to update the
15027   // MachineBasicBlock CFG, which is awkward.
15028 
15029   // Use SimplifySetCC to simplify SETCC's.
15030   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
15031                                CondLHS, CondRHS, CC->get(), SDLoc(N),
15032                                false);
15033   if (Simp.getNode()) AddToWorklist(Simp.getNode());
15034 
15035   // fold to a simpler setcc
15036   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
15037     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
15038                        N->getOperand(0), Simp.getOperand(2),
15039                        Simp.getOperand(0), Simp.getOperand(1),
15040                        N->getOperand(4));
15041 
15042   return SDValue();
15043 }
15044 
15045 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
15046                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
15047                                      const TargetLowering &TLI) {
15048   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15049     if (LD->isIndexed())
15050       return false;
15051     EVT VT = LD->getMemoryVT();
15052     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
15053       return false;
15054     Ptr = LD->getBasePtr();
15055   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15056     if (ST->isIndexed())
15057       return false;
15058     EVT VT = ST->getMemoryVT();
15059     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
15060       return false;
15061     Ptr = ST->getBasePtr();
15062     IsLoad = false;
15063   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
15064     if (LD->isIndexed())
15065       return false;
15066     EVT VT = LD->getMemoryVT();
15067     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
15068         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
15069       return false;
15070     Ptr = LD->getBasePtr();
15071     IsMasked = true;
15072   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
15073     if (ST->isIndexed())
15074       return false;
15075     EVT VT = ST->getMemoryVT();
15076     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
15077         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
15078       return false;
15079     Ptr = ST->getBasePtr();
15080     IsLoad = false;
15081     IsMasked = true;
15082   } else {
15083     return false;
15084   }
15085   return true;
15086 }
15087 
15088 /// Try turning a load/store into a pre-indexed load/store when the base
15089 /// pointer is an add or subtract and it has other uses besides the load/store.
15090 /// After the transformation, the new indexed load/store has effectively folded
15091 /// the add/subtract in and all of its other uses are redirected to the
15092 /// new load/store.
15093 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
15094   if (Level < AfterLegalizeDAG)
15095     return false;
15096 
15097   bool IsLoad = true;
15098   bool IsMasked = false;
15099   SDValue Ptr;
15100   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
15101                                 Ptr, TLI))
15102     return false;
15103 
15104   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
15105   // out.  There is no reason to make this a preinc/predec.
15106   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
15107       Ptr.getNode()->hasOneUse())
15108     return false;
15109 
15110   // Ask the target to do addressing mode selection.
15111   SDValue BasePtr;
15112   SDValue Offset;
15113   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15114   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
15115     return false;
15116 
15117   // Backends without true r+i pre-indexed forms may need to pass a
15118   // constant base with a variable offset so that constant coercion
15119   // will work with the patterns in canonical form.
15120   bool Swapped = false;
15121   if (isa<ConstantSDNode>(BasePtr)) {
15122     std::swap(BasePtr, Offset);
15123     Swapped = true;
15124   }
15125 
15126   // Don't create a indexed load / store with zero offset.
15127   if (isNullConstant(Offset))
15128     return false;
15129 
15130   // Try turning it into a pre-indexed load / store except when:
15131   // 1) The new base ptr is a frame index.
15132   // 2) If N is a store and the new base ptr is either the same as or is a
15133   //    predecessor of the value being stored.
15134   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
15135   //    that would create a cycle.
15136   // 4) All uses are load / store ops that use it as old base ptr.
15137 
15138   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
15139   // (plus the implicit offset) to a register to preinc anyway.
15140   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15141     return false;
15142 
15143   // Check #2.
15144   if (!IsLoad) {
15145     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
15146                            : cast<StoreSDNode>(N)->getValue();
15147 
15148     // Would require a copy.
15149     if (Val == BasePtr)
15150       return false;
15151 
15152     // Would create a cycle.
15153     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
15154       return false;
15155   }
15156 
15157   // Caches for hasPredecessorHelper.
15158   SmallPtrSet<const SDNode *, 32> Visited;
15159   SmallVector<const SDNode *, 16> Worklist;
15160   Worklist.push_back(N);
15161 
15162   // If the offset is a constant, there may be other adds of constants that
15163   // can be folded with this one. We should do this to avoid having to keep
15164   // a copy of the original base pointer.
15165   SmallVector<SDNode *, 16> OtherUses;
15166   if (isa<ConstantSDNode>(Offset))
15167     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
15168                               UE = BasePtr.getNode()->use_end();
15169          UI != UE; ++UI) {
15170       SDUse &Use = UI.getUse();
15171       // Skip the use that is Ptr and uses of other results from BasePtr's
15172       // node (important for nodes that return multiple results).
15173       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
15174         continue;
15175 
15176       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
15177         continue;
15178 
15179       if (Use.getUser()->getOpcode() != ISD::ADD &&
15180           Use.getUser()->getOpcode() != ISD::SUB) {
15181         OtherUses.clear();
15182         break;
15183       }
15184 
15185       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
15186       if (!isa<ConstantSDNode>(Op1)) {
15187         OtherUses.clear();
15188         break;
15189       }
15190 
15191       // FIXME: In some cases, we can be smarter about this.
15192       if (Op1.getValueType() != Offset.getValueType()) {
15193         OtherUses.clear();
15194         break;
15195       }
15196 
15197       OtherUses.push_back(Use.getUser());
15198     }
15199 
15200   if (Swapped)
15201     std::swap(BasePtr, Offset);
15202 
15203   // Now check for #3 and #4.
15204   bool RealUse = false;
15205 
15206   for (SDNode *Use : Ptr.getNode()->uses()) {
15207     if (Use == N)
15208       continue;
15209     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
15210       return false;
15211 
15212     // If Ptr may be folded in addressing mode of other use, then it's
15213     // not profitable to do this transformation.
15214     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
15215       RealUse = true;
15216   }
15217 
15218   if (!RealUse)
15219     return false;
15220 
15221   SDValue Result;
15222   if (!IsMasked) {
15223     if (IsLoad)
15224       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15225     else
15226       Result =
15227           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
15228   } else {
15229     if (IsLoad)
15230       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15231                                         Offset, AM);
15232     else
15233       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
15234                                          Offset, AM);
15235   }
15236   ++PreIndexedNodes;
15237   ++NodesCombined;
15238   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
15239              Result.getNode()->dump(&DAG); dbgs() << '\n');
15240   WorklistRemover DeadNodes(*this);
15241   if (IsLoad) {
15242     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15243     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15244   } else {
15245     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15246   }
15247 
15248   // Finally, since the node is now dead, remove it from the graph.
15249   deleteAndRecombine(N);
15250 
15251   if (Swapped)
15252     std::swap(BasePtr, Offset);
15253 
15254   // Replace other uses of BasePtr that can be updated to use Ptr
15255   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
15256     unsigned OffsetIdx = 1;
15257     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
15258       OffsetIdx = 0;
15259     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
15260            BasePtr.getNode() && "Expected BasePtr operand");
15261 
15262     // We need to replace ptr0 in the following expression:
15263     //   x0 * offset0 + y0 * ptr0 = t0
15264     // knowing that
15265     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
15266     //
15267     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
15268     // indexed load/store and the expression that needs to be re-written.
15269     //
15270     // Therefore, we have:
15271     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
15272 
15273     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
15274     const APInt &Offset0 = CN->getAPIntValue();
15275     const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
15276     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
15277     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
15278     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
15279     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
15280 
15281     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
15282 
15283     APInt CNV = Offset0;
15284     if (X0 < 0) CNV = -CNV;
15285     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
15286     else CNV = CNV - Offset1;
15287 
15288     SDLoc DL(OtherUses[i]);
15289 
15290     // We can now generate the new expression.
15291     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
15292     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
15293 
15294     SDValue NewUse = DAG.getNode(Opcode,
15295                                  DL,
15296                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
15297     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
15298     deleteAndRecombine(OtherUses[i]);
15299   }
15300 
15301   // Replace the uses of Ptr with uses of the updated base value.
15302   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
15303   deleteAndRecombine(Ptr.getNode());
15304   AddToWorklist(Result.getNode());
15305 
15306   return true;
15307 }
15308 
15309 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
15310                                    SDValue &BasePtr, SDValue &Offset,
15311                                    ISD::MemIndexedMode &AM,
15312                                    SelectionDAG &DAG,
15313                                    const TargetLowering &TLI) {
15314   if (PtrUse == N ||
15315       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
15316     return false;
15317 
15318   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
15319     return false;
15320 
15321   // Don't create a indexed load / store with zero offset.
15322   if (isNullConstant(Offset))
15323     return false;
15324 
15325   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
15326     return false;
15327 
15328   SmallPtrSet<const SDNode *, 32> Visited;
15329   for (SDNode *Use : BasePtr.getNode()->uses()) {
15330     if (Use == Ptr.getNode())
15331       continue;
15332 
15333     // No if there's a later user which could perform the index instead.
15334     if (isa<MemSDNode>(Use)) {
15335       bool IsLoad = true;
15336       bool IsMasked = false;
15337       SDValue OtherPtr;
15338       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15339                                    IsMasked, OtherPtr, TLI)) {
15340         SmallVector<const SDNode *, 2> Worklist;
15341         Worklist.push_back(Use);
15342         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15343           return false;
15344       }
15345     }
15346 
15347     // If all the uses are load / store addresses, then don't do the
15348     // transformation.
15349     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15350       for (SDNode *UseUse : Use->uses())
15351         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15352           return false;
15353     }
15354   }
15355   return true;
15356 }
15357 
15358 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15359                                          bool &IsMasked, SDValue &Ptr,
15360                                          SDValue &BasePtr, SDValue &Offset,
15361                                          ISD::MemIndexedMode &AM,
15362                                          SelectionDAG &DAG,
15363                                          const TargetLowering &TLI) {
15364   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15365                                 IsMasked, Ptr, TLI) ||
15366       Ptr.getNode()->hasOneUse())
15367     return nullptr;
15368 
15369   // Try turning it into a post-indexed load / store except when
15370   // 1) All uses are load / store ops that use it as base ptr (and
15371   //    it may be folded as addressing mmode).
15372   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15373   //    nor a successor of N. Otherwise, if Op is folded that would
15374   //    create a cycle.
15375   for (SDNode *Op : Ptr->uses()) {
15376     // Check for #1.
15377     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15378       continue;
15379 
15380     // Check for #2.
15381     SmallPtrSet<const SDNode *, 32> Visited;
15382     SmallVector<const SDNode *, 8> Worklist;
15383     // Ptr is predecessor to both N and Op.
15384     Visited.insert(Ptr.getNode());
15385     Worklist.push_back(N);
15386     Worklist.push_back(Op);
15387     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15388         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15389       return Op;
15390   }
15391   return nullptr;
15392 }
15393 
15394 /// Try to combine a load/store with a add/sub of the base pointer node into a
15395 /// post-indexed load/store. The transformation folded the add/subtract into the
15396 /// new indexed load/store effectively and all of its uses are redirected to the
15397 /// new load/store.
15398 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15399   if (Level < AfterLegalizeDAG)
15400     return false;
15401 
15402   bool IsLoad = true;
15403   bool IsMasked = false;
15404   SDValue Ptr;
15405   SDValue BasePtr;
15406   SDValue Offset;
15407   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15408   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15409                                          Offset, AM, DAG, TLI);
15410   if (!Op)
15411     return false;
15412 
15413   SDValue Result;
15414   if (!IsMasked)
15415     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15416                                          Offset, AM)
15417                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15418                                           BasePtr, Offset, AM);
15419   else
15420     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15421                                                BasePtr, Offset, AM)
15422                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15423                                                 BasePtr, Offset, AM);
15424   ++PostIndexedNodes;
15425   ++NodesCombined;
15426   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15427              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15428              dbgs() << '\n');
15429   WorklistRemover DeadNodes(*this);
15430   if (IsLoad) {
15431     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15432     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15433   } else {
15434     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15435   }
15436 
15437   // Finally, since the node is now dead, remove it from the graph.
15438   deleteAndRecombine(N);
15439 
15440   // Replace the uses of Use with uses of the updated base value.
15441   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15442                                 Result.getValue(IsLoad ? 1 : 0));
15443   deleteAndRecombine(Op);
15444   return true;
15445 }
15446 
15447 /// Return the base-pointer arithmetic from an indexed \p LD.
15448 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15449   ISD::MemIndexedMode AM = LD->getAddressingMode();
15450   assert(AM != ISD::UNINDEXED);
15451   SDValue BP = LD->getOperand(1);
15452   SDValue Inc = LD->getOperand(2);
15453 
15454   // Some backends use TargetConstants for load offsets, but don't expect
15455   // TargetConstants in general ADD nodes. We can convert these constants into
15456   // regular Constants (if the constant is not opaque).
15457   assert((Inc.getOpcode() != ISD::TargetConstant ||
15458           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15459          "Cannot split out indexing using opaque target constants");
15460   if (Inc.getOpcode() == ISD::TargetConstant) {
15461     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15462     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15463                           ConstInc->getValueType(0));
15464   }
15465 
15466   unsigned Opc =
15467       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15468   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15469 }
15470 
15471 static inline ElementCount numVectorEltsOrZero(EVT T) {
15472   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15473 }
15474 
15475 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15476   Val = ST->getValue();
15477   EVT STType = Val.getValueType();
15478   EVT STMemType = ST->getMemoryVT();
15479   if (STType == STMemType)
15480     return true;
15481   if (isTypeLegal(STMemType))
15482     return false; // fail.
15483   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15484       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15485     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15486     return true;
15487   }
15488   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15489       STType.isInteger() && STMemType.isInteger()) {
15490     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15491     return true;
15492   }
15493   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15494     Val = DAG.getBitcast(STMemType, Val);
15495     return true;
15496   }
15497   return false; // fail.
15498 }
15499 
15500 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15501   EVT LDMemType = LD->getMemoryVT();
15502   EVT LDType = LD->getValueType(0);
15503   assert(Val.getValueType() == LDMemType &&
15504          "Attempting to extend value of non-matching type");
15505   if (LDType == LDMemType)
15506     return true;
15507   if (LDMemType.isInteger() && LDType.isInteger()) {
15508     switch (LD->getExtensionType()) {
15509     case ISD::NON_EXTLOAD:
15510       Val = DAG.getBitcast(LDType, Val);
15511       return true;
15512     case ISD::EXTLOAD:
15513       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15514       return true;
15515     case ISD::SEXTLOAD:
15516       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15517       return true;
15518     case ISD::ZEXTLOAD:
15519       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15520       return true;
15521     }
15522   }
15523   return false;
15524 }
15525 
15526 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15527   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15528     return SDValue();
15529   SDValue Chain = LD->getOperand(0);
15530   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15531   // TODO: Relax this restriction for unordered atomics (see D66309)
15532   if (!ST || !ST->isSimple())
15533     return SDValue();
15534 
15535   EVT LDType = LD->getValueType(0);
15536   EVT LDMemType = LD->getMemoryVT();
15537   EVT STMemType = ST->getMemoryVT();
15538   EVT STType = ST->getValue().getValueType();
15539 
15540   // There are two cases to consider here:
15541   //  1. The store is fixed width and the load is scalable. In this case we
15542   //     don't know at compile time if the store completely envelops the load
15543   //     so we abandon the optimisation.
15544   //  2. The store is scalable and the load is fixed width. We could
15545   //     potentially support a limited number of cases here, but there has been
15546   //     no cost-benefit analysis to prove it's worth it.
15547   bool LdStScalable = LDMemType.isScalableVector();
15548   if (LdStScalable != STMemType.isScalableVector())
15549     return SDValue();
15550 
15551   // If we are dealing with scalable vectors on a big endian platform the
15552   // calculation of offsets below becomes trickier, since we do not know at
15553   // compile time the absolute size of the vector. Until we've done more
15554   // analysis on big-endian platforms it seems better to bail out for now.
15555   if (LdStScalable && DAG.getDataLayout().isBigEndian())
15556     return SDValue();
15557 
15558   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15559   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15560   int64_t Offset;
15561   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15562     return SDValue();
15563 
15564   // Normalize for Endianness. After this Offset=0 will denote that the least
15565   // significant bit in the loaded value maps to the least significant bit in
15566   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15567   // n:th least significant byte of the stored value.
15568   if (DAG.getDataLayout().isBigEndian())
15569     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15570               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15571                  8 -
15572              Offset;
15573 
15574   // Check that the stored value cover all bits that are loaded.
15575   bool STCoversLD;
15576 
15577   TypeSize LdMemSize = LDMemType.getSizeInBits();
15578   TypeSize StMemSize = STMemType.getSizeInBits();
15579   if (LdStScalable)
15580     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15581   else
15582     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15583                                    StMemSize.getFixedSize());
15584 
15585   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15586     if (LD->isIndexed()) {
15587       // Cannot handle opaque target constants and we must respect the user's
15588       // request not to split indexes from loads.
15589       if (!canSplitIdx(LD))
15590         return SDValue();
15591       SDValue Idx = SplitIndexingFromLoad(LD);
15592       SDValue Ops[] = {Val, Idx, Chain};
15593       return CombineTo(LD, Ops, 3);
15594     }
15595     return CombineTo(LD, Val, Chain);
15596   };
15597 
15598   if (!STCoversLD)
15599     return SDValue();
15600 
15601   // Memory as copy space (potentially masked).
15602   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15603     // Simple case: Direct non-truncating forwarding
15604     if (LDType.getSizeInBits() == LdMemSize)
15605       return ReplaceLd(LD, ST->getValue(), Chain);
15606     // Can we model the truncate and extension with an and mask?
15607     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15608         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15609       // Mask to size of LDMemType
15610       auto Mask =
15611           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15612                                                StMemSize.getFixedSize()),
15613                           SDLoc(ST), STType);
15614       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15615       return ReplaceLd(LD, Val, Chain);
15616     }
15617   }
15618 
15619   // TODO: Deal with nonzero offset.
15620   if (LD->getBasePtr().isUndef() || Offset != 0)
15621     return SDValue();
15622   // Model necessary truncations / extenstions.
15623   SDValue Val;
15624   // Truncate Value To Stored Memory Size.
15625   do {
15626     if (!getTruncatedStoreValue(ST, Val))
15627       continue;
15628     if (!isTypeLegal(LDMemType))
15629       continue;
15630     if (STMemType != LDMemType) {
15631       // TODO: Support vectors? This requires extract_subvector/bitcast.
15632       if (!STMemType.isVector() && !LDMemType.isVector() &&
15633           STMemType.isInteger() && LDMemType.isInteger())
15634         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15635       else
15636         continue;
15637     }
15638     if (!extendLoadedValueToExtension(LD, Val))
15639       continue;
15640     return ReplaceLd(LD, Val, Chain);
15641   } while (false);
15642 
15643   // On failure, cleanup dead nodes we may have created.
15644   if (Val->use_empty())
15645     deleteAndRecombine(Val.getNode());
15646   return SDValue();
15647 }
15648 
15649 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15650   LoadSDNode *LD  = cast<LoadSDNode>(N);
15651   SDValue Chain = LD->getChain();
15652   SDValue Ptr   = LD->getBasePtr();
15653 
15654   // If load is not volatile and there are no uses of the loaded value (and
15655   // the updated indexed value in case of indexed loads), change uses of the
15656   // chain value into uses of the chain input (i.e. delete the dead load).
15657   // TODO: Allow this for unordered atomics (see D66309)
15658   if (LD->isSimple()) {
15659     if (N->getValueType(1) == MVT::Other) {
15660       // Unindexed loads.
15661       if (!N->hasAnyUseOfValue(0)) {
15662         // It's not safe to use the two value CombineTo variant here. e.g.
15663         // v1, chain2 = load chain1, loc
15664         // v2, chain3 = load chain2, loc
15665         // v3         = add v2, c
15666         // Now we replace use of chain2 with chain1.  This makes the second load
15667         // isomorphic to the one we are deleting, and thus makes this load live.
15668         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15669                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15670                    dbgs() << "\n");
15671         WorklistRemover DeadNodes(*this);
15672         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15673         AddUsersToWorklist(Chain.getNode());
15674         if (N->use_empty())
15675           deleteAndRecombine(N);
15676 
15677         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15678       }
15679     } else {
15680       // Indexed loads.
15681       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15682 
15683       // If this load has an opaque TargetConstant offset, then we cannot split
15684       // the indexing into an add/sub directly (that TargetConstant may not be
15685       // valid for a different type of node, and we cannot convert an opaque
15686       // target constant into a regular constant).
15687       bool CanSplitIdx = canSplitIdx(LD);
15688 
15689       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15690         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15691         SDValue Index;
15692         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15693           Index = SplitIndexingFromLoad(LD);
15694           // Try to fold the base pointer arithmetic into subsequent loads and
15695           // stores.
15696           AddUsersToWorklist(N);
15697         } else
15698           Index = DAG.getUNDEF(N->getValueType(1));
15699         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15700                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15701                    dbgs() << " and 2 other values\n");
15702         WorklistRemover DeadNodes(*this);
15703         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15704         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15705         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15706         deleteAndRecombine(N);
15707         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15708       }
15709     }
15710   }
15711 
15712   // If this load is directly stored, replace the load value with the stored
15713   // value.
15714   if (auto V = ForwardStoreValueToDirectLoad(LD))
15715     return V;
15716 
15717   // Try to infer better alignment information than the load already has.
15718   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15719     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15720       if (*Alignment > LD->getAlign() &&
15721           isAligned(*Alignment, LD->getSrcValueOffset())) {
15722         SDValue NewLoad = DAG.getExtLoad(
15723             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15724             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15725             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15726         // NewLoad will always be N as we are only refining the alignment
15727         assert(NewLoad.getNode() == N);
15728         (void)NewLoad;
15729       }
15730     }
15731   }
15732 
15733   if (LD->isUnindexed()) {
15734     // Walk up chain skipping non-aliasing memory nodes.
15735     SDValue BetterChain = FindBetterChain(LD, Chain);
15736 
15737     // If there is a better chain.
15738     if (Chain != BetterChain) {
15739       SDValue ReplLoad;
15740 
15741       // Replace the chain to void dependency.
15742       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15743         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15744                                BetterChain, Ptr, LD->getMemOperand());
15745       } else {
15746         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15747                                   LD->getValueType(0),
15748                                   BetterChain, Ptr, LD->getMemoryVT(),
15749                                   LD->getMemOperand());
15750       }
15751 
15752       // Create token factor to keep old chain connected.
15753       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15754                                   MVT::Other, Chain, ReplLoad.getValue(1));
15755 
15756       // Replace uses with load result and token factor
15757       return CombineTo(N, ReplLoad.getValue(0), Token);
15758     }
15759   }
15760 
15761   // Try transforming N to an indexed load.
15762   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15763     return SDValue(N, 0);
15764 
15765   // Try to slice up N to more direct loads if the slices are mapped to
15766   // different register banks or pairing can take place.
15767   if (SliceUpLoad(N))
15768     return SDValue(N, 0);
15769 
15770   return SDValue();
15771 }
15772 
15773 namespace {
15774 
15775 /// Helper structure used to slice a load in smaller loads.
15776 /// Basically a slice is obtained from the following sequence:
15777 /// Origin = load Ty1, Base
15778 /// Shift = srl Ty1 Origin, CstTy Amount
15779 /// Inst = trunc Shift to Ty2
15780 ///
15781 /// Then, it will be rewritten into:
15782 /// Slice = load SliceTy, Base + SliceOffset
15783 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15784 ///
15785 /// SliceTy is deduced from the number of bits that are actually used to
15786 /// build Inst.
15787 struct LoadedSlice {
15788   /// Helper structure used to compute the cost of a slice.
15789   struct Cost {
15790     /// Are we optimizing for code size.
15791     bool ForCodeSize = false;
15792 
15793     /// Various cost.
15794     unsigned Loads = 0;
15795     unsigned Truncates = 0;
15796     unsigned CrossRegisterBanksCopies = 0;
15797     unsigned ZExts = 0;
15798     unsigned Shift = 0;
15799 
15800     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15801 
15802     /// Get the cost of one isolated slice.
15803     Cost(const LoadedSlice &LS, bool ForCodeSize)
15804         : ForCodeSize(ForCodeSize), Loads(1) {
15805       EVT TruncType = LS.Inst->getValueType(0);
15806       EVT LoadedType = LS.getLoadedType();
15807       if (TruncType != LoadedType &&
15808           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15809         ZExts = 1;
15810     }
15811 
15812     /// Account for slicing gain in the current cost.
15813     /// Slicing provide a few gains like removing a shift or a
15814     /// truncate. This method allows to grow the cost of the original
15815     /// load with the gain from this slice.
15816     void addSliceGain(const LoadedSlice &LS) {
15817       // Each slice saves a truncate.
15818       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15819       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15820                               LS.Inst->getValueType(0)))
15821         ++Truncates;
15822       // If there is a shift amount, this slice gets rid of it.
15823       if (LS.Shift)
15824         ++Shift;
15825       // If this slice can merge a cross register bank copy, account for it.
15826       if (LS.canMergeExpensiveCrossRegisterBankCopy())
15827         ++CrossRegisterBanksCopies;
15828     }
15829 
15830     Cost &operator+=(const Cost &RHS) {
15831       Loads += RHS.Loads;
15832       Truncates += RHS.Truncates;
15833       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15834       ZExts += RHS.ZExts;
15835       Shift += RHS.Shift;
15836       return *this;
15837     }
15838 
15839     bool operator==(const Cost &RHS) const {
15840       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15841              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15842              ZExts == RHS.ZExts && Shift == RHS.Shift;
15843     }
15844 
15845     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15846 
15847     bool operator<(const Cost &RHS) const {
15848       // Assume cross register banks copies are as expensive as loads.
15849       // FIXME: Do we want some more target hooks?
15850       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15851       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15852       // Unless we are optimizing for code size, consider the
15853       // expensive operation first.
15854       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15855         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15856       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15857              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15858     }
15859 
15860     bool operator>(const Cost &RHS) const { return RHS < *this; }
15861 
15862     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15863 
15864     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15865   };
15866 
15867   // The last instruction that represent the slice. This should be a
15868   // truncate instruction.
15869   SDNode *Inst;
15870 
15871   // The original load instruction.
15872   LoadSDNode *Origin;
15873 
15874   // The right shift amount in bits from the original load.
15875   unsigned Shift;
15876 
15877   // The DAG from which Origin came from.
15878   // This is used to get some contextual information about legal types, etc.
15879   SelectionDAG *DAG;
15880 
15881   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15882               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15883       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15884 
15885   /// Get the bits used in a chunk of bits \p BitWidth large.
15886   /// \return Result is \p BitWidth and has used bits set to 1 and
15887   ///         not used bits set to 0.
15888   APInt getUsedBits() const {
15889     // Reproduce the trunc(lshr) sequence:
15890     // - Start from the truncated value.
15891     // - Zero extend to the desired bit width.
15892     // - Shift left.
15893     assert(Origin && "No original load to compare against.");
15894     unsigned BitWidth = Origin->getValueSizeInBits(0);
15895     assert(Inst && "This slice is not bound to an instruction");
15896     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15897            "Extracted slice is bigger than the whole type!");
15898     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15899     UsedBits.setAllBits();
15900     UsedBits = UsedBits.zext(BitWidth);
15901     UsedBits <<= Shift;
15902     return UsedBits;
15903   }
15904 
15905   /// Get the size of the slice to be loaded in bytes.
15906   unsigned getLoadedSize() const {
15907     unsigned SliceSize = getUsedBits().countPopulation();
15908     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15909     return SliceSize / 8;
15910   }
15911 
15912   /// Get the type that will be loaded for this slice.
15913   /// Note: This may not be the final type for the slice.
15914   EVT getLoadedType() const {
15915     assert(DAG && "Missing context");
15916     LLVMContext &Ctxt = *DAG->getContext();
15917     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15918   }
15919 
15920   /// Get the alignment of the load used for this slice.
15921   Align getAlign() const {
15922     Align Alignment = Origin->getAlign();
15923     uint64_t Offset = getOffsetFromBase();
15924     if (Offset != 0)
15925       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15926     return Alignment;
15927   }
15928 
15929   /// Check if this slice can be rewritten with legal operations.
15930   bool isLegal() const {
15931     // An invalid slice is not legal.
15932     if (!Origin || !Inst || !DAG)
15933       return false;
15934 
15935     // Offsets are for indexed load only, we do not handle that.
15936     if (!Origin->getOffset().isUndef())
15937       return false;
15938 
15939     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15940 
15941     // Check that the type is legal.
15942     EVT SliceType = getLoadedType();
15943     if (!TLI.isTypeLegal(SliceType))
15944       return false;
15945 
15946     // Check that the load is legal for this type.
15947     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15948       return false;
15949 
15950     // Check that the offset can be computed.
15951     // 1. Check its type.
15952     EVT PtrType = Origin->getBasePtr().getValueType();
15953     if (PtrType == MVT::Untyped || PtrType.isExtended())
15954       return false;
15955 
15956     // 2. Check that it fits in the immediate.
15957     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15958       return false;
15959 
15960     // 3. Check that the computation is legal.
15961     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15962       return false;
15963 
15964     // Check that the zext is legal if it needs one.
15965     EVT TruncateType = Inst->getValueType(0);
15966     if (TruncateType != SliceType &&
15967         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15968       return false;
15969 
15970     return true;
15971   }
15972 
15973   /// Get the offset in bytes of this slice in the original chunk of
15974   /// bits.
15975   /// \pre DAG != nullptr.
15976   uint64_t getOffsetFromBase() const {
15977     assert(DAG && "Missing context.");
15978     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15979     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15980     uint64_t Offset = Shift / 8;
15981     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15982     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15983            "The size of the original loaded type is not a multiple of a"
15984            " byte.");
15985     // If Offset is bigger than TySizeInBytes, it means we are loading all
15986     // zeros. This should have been optimized before in the process.
15987     assert(TySizeInBytes > Offset &&
15988            "Invalid shift amount for given loaded size");
15989     if (IsBigEndian)
15990       Offset = TySizeInBytes - Offset - getLoadedSize();
15991     return Offset;
15992   }
15993 
15994   /// Generate the sequence of instructions to load the slice
15995   /// represented by this object and redirect the uses of this slice to
15996   /// this new sequence of instructions.
15997   /// \pre this->Inst && this->Origin are valid Instructions and this
15998   /// object passed the legal check: LoadedSlice::isLegal returned true.
15999   /// \return The last instruction of the sequence used to load the slice.
16000   SDValue loadSlice() const {
16001     assert(Inst && Origin && "Unable to replace a non-existing slice.");
16002     const SDValue &OldBaseAddr = Origin->getBasePtr();
16003     SDValue BaseAddr = OldBaseAddr;
16004     // Get the offset in that chunk of bytes w.r.t. the endianness.
16005     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
16006     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
16007     if (Offset) {
16008       // BaseAddr = BaseAddr + Offset.
16009       EVT ArithType = BaseAddr.getValueType();
16010       SDLoc DL(Origin);
16011       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
16012                               DAG->getConstant(Offset, DL, ArithType));
16013     }
16014 
16015     // Create the type of the loaded slice according to its size.
16016     EVT SliceType = getLoadedType();
16017 
16018     // Create the load for the slice.
16019     SDValue LastInst =
16020         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
16021                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
16022                      Origin->getMemOperand()->getFlags());
16023     // If the final type is not the same as the loaded type, this means that
16024     // we have to pad with zero. Create a zero extend for that.
16025     EVT FinalType = Inst->getValueType(0);
16026     if (SliceType != FinalType)
16027       LastInst =
16028           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
16029     return LastInst;
16030   }
16031 
16032   /// Check if this slice can be merged with an expensive cross register
16033   /// bank copy. E.g.,
16034   /// i = load i32
16035   /// f = bitcast i32 i to float
16036   bool canMergeExpensiveCrossRegisterBankCopy() const {
16037     if (!Inst || !Inst->hasOneUse())
16038       return false;
16039     SDNode *Use = *Inst->use_begin();
16040     if (Use->getOpcode() != ISD::BITCAST)
16041       return false;
16042     assert(DAG && "Missing context");
16043     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
16044     EVT ResVT = Use->getValueType(0);
16045     const TargetRegisterClass *ResRC =
16046         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
16047     const TargetRegisterClass *ArgRC =
16048         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
16049                            Use->getOperand(0)->isDivergent());
16050     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
16051       return false;
16052 
16053     // At this point, we know that we perform a cross-register-bank copy.
16054     // Check if it is expensive.
16055     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
16056     // Assume bitcasts are cheap, unless both register classes do not
16057     // explicitly share a common sub class.
16058     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
16059       return false;
16060 
16061     // Check if it will be merged with the load.
16062     // 1. Check the alignment constraint.
16063     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
16064         ResVT.getTypeForEVT(*DAG->getContext()));
16065 
16066     if (RequiredAlignment > getAlign())
16067       return false;
16068 
16069     // 2. Check that the load is a legal operation for that type.
16070     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
16071       return false;
16072 
16073     // 3. Check that we do not have a zext in the way.
16074     if (Inst->getValueType(0) != getLoadedType())
16075       return false;
16076 
16077     return true;
16078   }
16079 };
16080 
16081 } // end anonymous namespace
16082 
16083 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
16084 /// \p UsedBits looks like 0..0 1..1 0..0.
16085 static bool areUsedBitsDense(const APInt &UsedBits) {
16086   // If all the bits are one, this is dense!
16087   if (UsedBits.isAllOnesValue())
16088     return true;
16089 
16090   // Get rid of the unused bits on the right.
16091   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
16092   // Get rid of the unused bits on the left.
16093   if (NarrowedUsedBits.countLeadingZeros())
16094     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
16095   // Check that the chunk of bits is completely used.
16096   return NarrowedUsedBits.isAllOnesValue();
16097 }
16098 
16099 /// Check whether or not \p First and \p Second are next to each other
16100 /// in memory. This means that there is no hole between the bits loaded
16101 /// by \p First and the bits loaded by \p Second.
16102 static bool areSlicesNextToEachOther(const LoadedSlice &First,
16103                                      const LoadedSlice &Second) {
16104   assert(First.Origin == Second.Origin && First.Origin &&
16105          "Unable to match different memory origins.");
16106   APInt UsedBits = First.getUsedBits();
16107   assert((UsedBits & Second.getUsedBits()) == 0 &&
16108          "Slices are not supposed to overlap.");
16109   UsedBits |= Second.getUsedBits();
16110   return areUsedBitsDense(UsedBits);
16111 }
16112 
16113 /// Adjust the \p GlobalLSCost according to the target
16114 /// paring capabilities and the layout of the slices.
16115 /// \pre \p GlobalLSCost should account for at least as many loads as
16116 /// there is in the slices in \p LoadedSlices.
16117 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16118                                  LoadedSlice::Cost &GlobalLSCost) {
16119   unsigned NumberOfSlices = LoadedSlices.size();
16120   // If there is less than 2 elements, no pairing is possible.
16121   if (NumberOfSlices < 2)
16122     return;
16123 
16124   // Sort the slices so that elements that are likely to be next to each
16125   // other in memory are next to each other in the list.
16126   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
16127     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
16128     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
16129   });
16130   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
16131   // First (resp. Second) is the first (resp. Second) potentially candidate
16132   // to be placed in a paired load.
16133   const LoadedSlice *First = nullptr;
16134   const LoadedSlice *Second = nullptr;
16135   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
16136                 // Set the beginning of the pair.
16137                                                            First = Second) {
16138     Second = &LoadedSlices[CurrSlice];
16139 
16140     // If First is NULL, it means we start a new pair.
16141     // Get to the next slice.
16142     if (!First)
16143       continue;
16144 
16145     EVT LoadedType = First->getLoadedType();
16146 
16147     // If the types of the slices are different, we cannot pair them.
16148     if (LoadedType != Second->getLoadedType())
16149       continue;
16150 
16151     // Check if the target supplies paired loads for this type.
16152     Align RequiredAlignment;
16153     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
16154       // move to the next pair, this type is hopeless.
16155       Second = nullptr;
16156       continue;
16157     }
16158     // Check if we meet the alignment requirement.
16159     if (First->getAlign() < RequiredAlignment)
16160       continue;
16161 
16162     // Check that both loads are next to each other in memory.
16163     if (!areSlicesNextToEachOther(*First, *Second))
16164       continue;
16165 
16166     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
16167     --GlobalLSCost.Loads;
16168     // Move to the next pair.
16169     Second = nullptr;
16170   }
16171 }
16172 
16173 /// Check the profitability of all involved LoadedSlice.
16174 /// Currently, it is considered profitable if there is exactly two
16175 /// involved slices (1) which are (2) next to each other in memory, and
16176 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
16177 ///
16178 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
16179 /// the elements themselves.
16180 ///
16181 /// FIXME: When the cost model will be mature enough, we can relax
16182 /// constraints (1) and (2).
16183 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
16184                                 const APInt &UsedBits, bool ForCodeSize) {
16185   unsigned NumberOfSlices = LoadedSlices.size();
16186   if (StressLoadSlicing)
16187     return NumberOfSlices > 1;
16188 
16189   // Check (1).
16190   if (NumberOfSlices != 2)
16191     return false;
16192 
16193   // Check (2).
16194   if (!areUsedBitsDense(UsedBits))
16195     return false;
16196 
16197   // Check (3).
16198   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
16199   // The original code has one big load.
16200   OrigCost.Loads = 1;
16201   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
16202     const LoadedSlice &LS = LoadedSlices[CurrSlice];
16203     // Accumulate the cost of all the slices.
16204     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
16205     GlobalSlicingCost += SliceCost;
16206 
16207     // Account as cost in the original configuration the gain obtained
16208     // with the current slices.
16209     OrigCost.addSliceGain(LS);
16210   }
16211 
16212   // If the target supports paired load, adjust the cost accordingly.
16213   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
16214   return OrigCost > GlobalSlicingCost;
16215 }
16216 
16217 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
16218 /// operations, split it in the various pieces being extracted.
16219 ///
16220 /// This sort of thing is introduced by SROA.
16221 /// This slicing takes care not to insert overlapping loads.
16222 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
16223 bool DAGCombiner::SliceUpLoad(SDNode *N) {
16224   if (Level < AfterLegalizeDAG)
16225     return false;
16226 
16227   LoadSDNode *LD = cast<LoadSDNode>(N);
16228   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
16229       !LD->getValueType(0).isInteger())
16230     return false;
16231 
16232   // The algorithm to split up a load of a scalable vector into individual
16233   // elements currently requires knowing the length of the loaded type,
16234   // so will need adjusting to work on scalable vectors.
16235   if (LD->getValueType(0).isScalableVector())
16236     return false;
16237 
16238   // Keep track of already used bits to detect overlapping values.
16239   // In that case, we will just abort the transformation.
16240   APInt UsedBits(LD->getValueSizeInBits(0), 0);
16241 
16242   SmallVector<LoadedSlice, 4> LoadedSlices;
16243 
16244   // Check if this load is used as several smaller chunks of bits.
16245   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
16246   // of computation for each trunc.
16247   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
16248        UI != UIEnd; ++UI) {
16249     // Skip the uses of the chain.
16250     if (UI.getUse().getResNo() != 0)
16251       continue;
16252 
16253     SDNode *User = *UI;
16254     unsigned Shift = 0;
16255 
16256     // Check if this is a trunc(lshr).
16257     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
16258         isa<ConstantSDNode>(User->getOperand(1))) {
16259       Shift = User->getConstantOperandVal(1);
16260       User = *User->use_begin();
16261     }
16262 
16263     // At this point, User is a Truncate, iff we encountered, trunc or
16264     // trunc(lshr).
16265     if (User->getOpcode() != ISD::TRUNCATE)
16266       return false;
16267 
16268     // The width of the type must be a power of 2 and greater than 8-bits.
16269     // Otherwise the load cannot be represented in LLVM IR.
16270     // Moreover, if we shifted with a non-8-bits multiple, the slice
16271     // will be across several bytes. We do not support that.
16272     unsigned Width = User->getValueSizeInBits(0);
16273     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
16274       return false;
16275 
16276     // Build the slice for this chain of computations.
16277     LoadedSlice LS(User, LD, Shift, &DAG);
16278     APInt CurrentUsedBits = LS.getUsedBits();
16279 
16280     // Check if this slice overlaps with another.
16281     if ((CurrentUsedBits & UsedBits) != 0)
16282       return false;
16283     // Update the bits used globally.
16284     UsedBits |= CurrentUsedBits;
16285 
16286     // Check if the new slice would be legal.
16287     if (!LS.isLegal())
16288       return false;
16289 
16290     // Record the slice.
16291     LoadedSlices.push_back(LS);
16292   }
16293 
16294   // Abort slicing if it does not seem to be profitable.
16295   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
16296     return false;
16297 
16298   ++SlicedLoads;
16299 
16300   // Rewrite each chain to use an independent load.
16301   // By construction, each chain can be represented by a unique load.
16302 
16303   // Prepare the argument for the new token factor for all the slices.
16304   SmallVector<SDValue, 8> ArgChains;
16305   for (const LoadedSlice &LS : LoadedSlices) {
16306     SDValue SliceInst = LS.loadSlice();
16307     CombineTo(LS.Inst, SliceInst, true);
16308     if (SliceInst.getOpcode() != ISD::LOAD)
16309       SliceInst = SliceInst.getOperand(0);
16310     assert(SliceInst->getOpcode() == ISD::LOAD &&
16311            "It takes more than a zext to get to the loaded slice!!");
16312     ArgChains.push_back(SliceInst.getValue(1));
16313   }
16314 
16315   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
16316                               ArgChains);
16317   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
16318   AddToWorklist(Chain.getNode());
16319   return true;
16320 }
16321 
16322 /// Check to see if V is (and load (ptr), imm), where the load is having
16323 /// specific bytes cleared out.  If so, return the byte size being masked out
16324 /// and the shift amount.
16325 static std::pair<unsigned, unsigned>
16326 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
16327   std::pair<unsigned, unsigned> Result(0, 0);
16328 
16329   // Check for the structure we're looking for.
16330   if (V->getOpcode() != ISD::AND ||
16331       !isa<ConstantSDNode>(V->getOperand(1)) ||
16332       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16333     return Result;
16334 
16335   // Check the chain and pointer.
16336   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16337   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16338 
16339   // This only handles simple types.
16340   if (V.getValueType() != MVT::i16 &&
16341       V.getValueType() != MVT::i32 &&
16342       V.getValueType() != MVT::i64)
16343     return Result;
16344 
16345   // Check the constant mask.  Invert it so that the bits being masked out are
16346   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16347   // follow the sign bit for uniformity.
16348   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16349   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16350   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16351   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16352   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16353   if (NotMaskLZ == 64) return Result;  // All zero mask.
16354 
16355   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16356   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16357     return Result;
16358 
16359   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16360   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16361     NotMaskLZ -= 64-V.getValueSizeInBits();
16362 
16363   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16364   switch (MaskedBytes) {
16365   case 1:
16366   case 2:
16367   case 4: break;
16368   default: return Result; // All one mask, or 5-byte mask.
16369   }
16370 
16371   // Verify that the first bit starts at a multiple of mask so that the access
16372   // is aligned the same as the access width.
16373   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16374 
16375   // For narrowing to be valid, it must be the case that the load the
16376   // immediately preceding memory operation before the store.
16377   if (LD == Chain.getNode())
16378     ; // ok.
16379   else if (Chain->getOpcode() == ISD::TokenFactor &&
16380            SDValue(LD, 1).hasOneUse()) {
16381     // LD has only 1 chain use so they are no indirect dependencies.
16382     if (!LD->isOperandOf(Chain.getNode()))
16383       return Result;
16384   } else
16385     return Result; // Fail.
16386 
16387   Result.first = MaskedBytes;
16388   Result.second = NotMaskTZ/8;
16389   return Result;
16390 }
16391 
16392 /// Check to see if IVal is something that provides a value as specified by
16393 /// MaskInfo. If so, replace the specified store with a narrower store of
16394 /// truncated IVal.
16395 static SDValue
16396 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16397                                 SDValue IVal, StoreSDNode *St,
16398                                 DAGCombiner *DC) {
16399   unsigned NumBytes = MaskInfo.first;
16400   unsigned ByteShift = MaskInfo.second;
16401   SelectionDAG &DAG = DC->getDAG();
16402 
16403   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16404   // that uses this.  If not, this is not a replacement.
16405   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16406                                   ByteShift*8, (ByteShift+NumBytes)*8);
16407   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16408 
16409   // Check that it is legal on the target to do this.  It is legal if the new
16410   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16411   // legalization (and the target doesn't explicitly think this is a bad idea).
16412   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16413   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16414   if (!DC->isTypeLegal(VT))
16415     return SDValue();
16416   if (St->getMemOperand() &&
16417       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16418                               *St->getMemOperand()))
16419     return SDValue();
16420 
16421   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16422   // shifted by ByteShift and truncated down to NumBytes.
16423   if (ByteShift) {
16424     SDLoc DL(IVal);
16425     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16426                        DAG.getConstant(ByteShift*8, DL,
16427                                     DC->getShiftAmountTy(IVal.getValueType())));
16428   }
16429 
16430   // Figure out the offset for the store and the alignment of the access.
16431   unsigned StOffset;
16432   if (DAG.getDataLayout().isLittleEndian())
16433     StOffset = ByteShift;
16434   else
16435     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16436 
16437   SDValue Ptr = St->getBasePtr();
16438   if (StOffset) {
16439     SDLoc DL(IVal);
16440     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16441   }
16442 
16443   // Truncate down to the new size.
16444   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16445 
16446   ++OpsNarrowed;
16447   return DAG
16448       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16449                 St->getPointerInfo().getWithOffset(StOffset),
16450                 St->getOriginalAlign());
16451 }
16452 
16453 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16454 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16455 /// narrowing the load and store if it would end up being a win for performance
16456 /// or code size.
16457 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16458   StoreSDNode *ST  = cast<StoreSDNode>(N);
16459   if (!ST->isSimple())
16460     return SDValue();
16461 
16462   SDValue Chain = ST->getChain();
16463   SDValue Value = ST->getValue();
16464   SDValue Ptr   = ST->getBasePtr();
16465   EVT VT = Value.getValueType();
16466 
16467   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16468     return SDValue();
16469 
16470   unsigned Opc = Value.getOpcode();
16471 
16472   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16473   // is a byte mask indicating a consecutive number of bytes, check to see if
16474   // Y is known to provide just those bytes.  If so, we try to replace the
16475   // load + replace + store sequence with a single (narrower) store, which makes
16476   // the load dead.
16477   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16478     std::pair<unsigned, unsigned> MaskedLoad;
16479     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16480     if (MaskedLoad.first)
16481       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16482                                                   Value.getOperand(1), ST,this))
16483         return NewST;
16484 
16485     // Or is commutative, so try swapping X and Y.
16486     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16487     if (MaskedLoad.first)
16488       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16489                                                   Value.getOperand(0), ST,this))
16490         return NewST;
16491   }
16492 
16493   if (!EnableReduceLoadOpStoreWidth)
16494     return SDValue();
16495 
16496   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16497       Value.getOperand(1).getOpcode() != ISD::Constant)
16498     return SDValue();
16499 
16500   SDValue N0 = Value.getOperand(0);
16501   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16502       Chain == SDValue(N0.getNode(), 1)) {
16503     LoadSDNode *LD = cast<LoadSDNode>(N0);
16504     if (LD->getBasePtr() != Ptr ||
16505         LD->getPointerInfo().getAddrSpace() !=
16506         ST->getPointerInfo().getAddrSpace())
16507       return SDValue();
16508 
16509     // Find the type to narrow it the load / op / store to.
16510     SDValue N1 = Value.getOperand(1);
16511     unsigned BitWidth = N1.getValueSizeInBits();
16512     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16513     if (Opc == ISD::AND)
16514       Imm ^= APInt::getAllOnesValue(BitWidth);
16515     if (Imm == 0 || Imm.isAllOnesValue())
16516       return SDValue();
16517     unsigned ShAmt = Imm.countTrailingZeros();
16518     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16519     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16520     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16521     // The narrowing should be profitable, the load/store operation should be
16522     // legal (or custom) and the store size should be equal to the NewVT width.
16523     while (NewBW < BitWidth &&
16524            (NewVT.getStoreSizeInBits() != NewBW ||
16525             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16526             !TLI.isNarrowingProfitable(VT, NewVT))) {
16527       NewBW = NextPowerOf2(NewBW);
16528       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16529     }
16530     if (NewBW >= BitWidth)
16531       return SDValue();
16532 
16533     // If the lsb changed does not start at the type bitwidth boundary,
16534     // start at the previous one.
16535     if (ShAmt % NewBW)
16536       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16537     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16538                                    std::min(BitWidth, ShAmt + NewBW));
16539     if ((Imm & Mask) == Imm) {
16540       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16541       if (Opc == ISD::AND)
16542         NewImm ^= APInt::getAllOnesValue(NewBW);
16543       uint64_t PtrOff = ShAmt / 8;
16544       // For big endian targets, we need to adjust the offset to the pointer to
16545       // load the correct bytes.
16546       if (DAG.getDataLayout().isBigEndian())
16547         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16548 
16549       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16550       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16551       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16552         return SDValue();
16553 
16554       SDValue NewPtr =
16555           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16556       SDValue NewLD =
16557           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16558                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16559                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
16560       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16561                                    DAG.getConstant(NewImm, SDLoc(Value),
16562                                                    NewVT));
16563       SDValue NewST =
16564           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16565                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16566 
16567       AddToWorklist(NewPtr.getNode());
16568       AddToWorklist(NewLD.getNode());
16569       AddToWorklist(NewVal.getNode());
16570       WorklistRemover DeadNodes(*this);
16571       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16572       ++OpsNarrowed;
16573       return NewST;
16574     }
16575   }
16576 
16577   return SDValue();
16578 }
16579 
16580 /// For a given floating point load / store pair, if the load value isn't used
16581 /// by any other operations, then consider transforming the pair to integer
16582 /// load / store operations if the target deems the transformation profitable.
16583 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16584   StoreSDNode *ST  = cast<StoreSDNode>(N);
16585   SDValue Value = ST->getValue();
16586   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16587       Value.hasOneUse()) {
16588     LoadSDNode *LD = cast<LoadSDNode>(Value);
16589     EVT VT = LD->getMemoryVT();
16590     if (!VT.isFloatingPoint() ||
16591         VT != ST->getMemoryVT() ||
16592         LD->isNonTemporal() ||
16593         ST->isNonTemporal() ||
16594         LD->getPointerInfo().getAddrSpace() != 0 ||
16595         ST->getPointerInfo().getAddrSpace() != 0)
16596       return SDValue();
16597 
16598     TypeSize VTSize = VT.getSizeInBits();
16599 
16600     // We don't know the size of scalable types at compile time so we cannot
16601     // create an integer of the equivalent size.
16602     if (VTSize.isScalable())
16603       return SDValue();
16604 
16605     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16606     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16607         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16608         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16609         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16610       return SDValue();
16611 
16612     Align LDAlign = LD->getAlign();
16613     Align STAlign = ST->getAlign();
16614     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16615     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16616     if (LDAlign < ABIAlign || STAlign < ABIAlign)
16617       return SDValue();
16618 
16619     SDValue NewLD =
16620         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16621                     LD->getPointerInfo(), LDAlign);
16622 
16623     SDValue NewST =
16624         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16625                      ST->getPointerInfo(), STAlign);
16626 
16627     AddToWorklist(NewLD.getNode());
16628     AddToWorklist(NewST.getNode());
16629     WorklistRemover DeadNodes(*this);
16630     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16631     ++LdStFP2Int;
16632     return NewST;
16633   }
16634 
16635   return SDValue();
16636 }
16637 
16638 // This is a helper function for visitMUL to check the profitability
16639 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16640 // MulNode is the original multiply, AddNode is (add x, c1),
16641 // and ConstNode is c2.
16642 //
16643 // If the (add x, c1) has multiple uses, we could increase
16644 // the number of adds if we make this transformation.
16645 // It would only be worth doing this if we can remove a
16646 // multiply in the process. Check for that here.
16647 // To illustrate:
16648 //     (A + c1) * c3
16649 //     (A + c2) * c3
16650 // We're checking for cases where we have common "c3 * A" expressions.
16651 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16652                                               SDValue &AddNode,
16653                                               SDValue &ConstNode) {
16654   APInt Val;
16655 
16656   // If the add only has one use, this would be OK to do.
16657   if (AddNode.getNode()->hasOneUse())
16658     return true;
16659 
16660   // Walk all the users of the constant with which we're multiplying.
16661   for (SDNode *Use : ConstNode->uses()) {
16662     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16663       continue;
16664 
16665     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16666       SDNode *OtherOp;
16667       SDNode *MulVar = AddNode.getOperand(0).getNode();
16668 
16669       // OtherOp is what we're multiplying against the constant.
16670       if (Use->getOperand(0) == ConstNode)
16671         OtherOp = Use->getOperand(1).getNode();
16672       else
16673         OtherOp = Use->getOperand(0).getNode();
16674 
16675       // Check to see if multiply is with the same operand of our "add".
16676       //
16677       //     ConstNode  = CONST
16678       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16679       //     ...
16680       //     AddNode  = (A + c1)  <-- MulVar is A.
16681       //         = AddNode * ConstNode   <-- current visiting instruction.
16682       //
16683       // If we make this transformation, we will have a common
16684       // multiply (ConstNode * A) that we can save.
16685       if (OtherOp == MulVar)
16686         return true;
16687 
16688       // Now check to see if a future expansion will give us a common
16689       // multiply.
16690       //
16691       //     ConstNode  = CONST
16692       //     AddNode    = (A + c1)
16693       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16694       //     ...
16695       //     OtherOp = (A + c2)
16696       //     Use     = OtherOp * ConstNode <-- visiting Use.
16697       //
16698       // If we make this transformation, we will have a common
16699       // multiply (CONST * A) after we also do the same transformation
16700       // to the "t2" instruction.
16701       if (OtherOp->getOpcode() == ISD::ADD &&
16702           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16703           OtherOp->getOperand(0).getNode() == MulVar)
16704         return true;
16705     }
16706   }
16707 
16708   // Didn't find a case where this would be profitable.
16709   return false;
16710 }
16711 
16712 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16713                                          unsigned NumStores) {
16714   SmallVector<SDValue, 8> Chains;
16715   SmallPtrSet<const SDNode *, 8> Visited;
16716   SDLoc StoreDL(StoreNodes[0].MemNode);
16717 
16718   for (unsigned i = 0; i < NumStores; ++i) {
16719     Visited.insert(StoreNodes[i].MemNode);
16720   }
16721 
16722   // don't include nodes that are children or repeated nodes.
16723   for (unsigned i = 0; i < NumStores; ++i) {
16724     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16725       Chains.push_back(StoreNodes[i].MemNode->getChain());
16726   }
16727 
16728   assert(Chains.size() > 0 && "Chain should have generated a chain");
16729   return DAG.getTokenFactor(StoreDL, Chains);
16730 }
16731 
16732 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16733     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16734     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16735   // Make sure we have something to merge.
16736   if (NumStores < 2)
16737     return false;
16738 
16739   // The latest Node in the DAG.
16740   SDLoc DL(StoreNodes[0].MemNode);
16741 
16742   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16743   unsigned SizeInBits = NumStores * ElementSizeBits;
16744   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16745 
16746   EVT StoreTy;
16747   if (UseVector) {
16748     unsigned Elts = NumStores * NumMemElts;
16749     // Get the type for the merged vector store.
16750     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16751   } else
16752     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16753 
16754   SDValue StoredVal;
16755   if (UseVector) {
16756     if (IsConstantSrc) {
16757       SmallVector<SDValue, 8> BuildVector;
16758       for (unsigned I = 0; I != NumStores; ++I) {
16759         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16760         SDValue Val = St->getValue();
16761         // If constant is of the wrong type, convert it now.
16762         if (MemVT != Val.getValueType()) {
16763           Val = peekThroughBitcasts(Val);
16764           // Deal with constants of wrong size.
16765           if (ElementSizeBits != Val.getValueSizeInBits()) {
16766             EVT IntMemVT =
16767                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16768             if (isa<ConstantFPSDNode>(Val)) {
16769               // Not clear how to truncate FP values.
16770               return false;
16771             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16772               Val = DAG.getConstant(C->getAPIntValue()
16773                                         .zextOrTrunc(Val.getValueSizeInBits())
16774                                         .zextOrTrunc(ElementSizeBits),
16775                                     SDLoc(C), IntMemVT);
16776           }
16777           // Make sure correctly size type is the correct type.
16778           Val = DAG.getBitcast(MemVT, Val);
16779         }
16780         BuildVector.push_back(Val);
16781       }
16782       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16783                                                : ISD::BUILD_VECTOR,
16784                               DL, StoreTy, BuildVector);
16785     } else {
16786       SmallVector<SDValue, 8> Ops;
16787       for (unsigned i = 0; i < NumStores; ++i) {
16788         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16789         SDValue Val = peekThroughBitcasts(St->getValue());
16790         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16791         // type MemVT. If the underlying value is not the correct
16792         // type, but it is an extraction of an appropriate vector we
16793         // can recast Val to be of the correct type. This may require
16794         // converting between EXTRACT_VECTOR_ELT and
16795         // EXTRACT_SUBVECTOR.
16796         if ((MemVT != Val.getValueType()) &&
16797             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16798              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16799           EVT MemVTScalarTy = MemVT.getScalarType();
16800           // We may need to add a bitcast here to get types to line up.
16801           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16802             Val = DAG.getBitcast(MemVT, Val);
16803           } else {
16804             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16805                                             : ISD::EXTRACT_VECTOR_ELT;
16806             SDValue Vec = Val.getOperand(0);
16807             SDValue Idx = Val.getOperand(1);
16808             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16809           }
16810         }
16811         Ops.push_back(Val);
16812       }
16813 
16814       // Build the extracted vector elements back into a vector.
16815       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16816                                                : ISD::BUILD_VECTOR,
16817                               DL, StoreTy, Ops);
16818     }
16819   } else {
16820     // We should always use a vector store when merging extracted vector
16821     // elements, so this path implies a store of constants.
16822     assert(IsConstantSrc && "Merged vector elements should use vector store");
16823 
16824     APInt StoreInt(SizeInBits, 0);
16825 
16826     // Construct a single integer constant which is made of the smaller
16827     // constant inputs.
16828     bool IsLE = DAG.getDataLayout().isLittleEndian();
16829     for (unsigned i = 0; i < NumStores; ++i) {
16830       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16831       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16832 
16833       SDValue Val = St->getValue();
16834       Val = peekThroughBitcasts(Val);
16835       StoreInt <<= ElementSizeBits;
16836       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16837         StoreInt |= C->getAPIntValue()
16838                         .zextOrTrunc(ElementSizeBits)
16839                         .zextOrTrunc(SizeInBits);
16840       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16841         StoreInt |= C->getValueAPF()
16842                         .bitcastToAPInt()
16843                         .zextOrTrunc(ElementSizeBits)
16844                         .zextOrTrunc(SizeInBits);
16845         // If fp truncation is necessary give up for now.
16846         if (MemVT.getSizeInBits() != ElementSizeBits)
16847           return false;
16848       } else {
16849         llvm_unreachable("Invalid constant element type");
16850       }
16851     }
16852 
16853     // Create the new Load and Store operations.
16854     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16855   }
16856 
16857   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16858   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16859 
16860   // make sure we use trunc store if it's necessary to be legal.
16861   SDValue NewStore;
16862   if (!UseTrunc) {
16863     NewStore =
16864         DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16865                      FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16866   } else { // Must be realized as a trunc store
16867     EVT LegalizedStoredValTy =
16868         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16869     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16870     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16871     SDValue ExtendedStoreVal =
16872         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16873                         LegalizedStoredValTy);
16874     NewStore = DAG.getTruncStore(
16875         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16876         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16877         FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16878   }
16879 
16880   // Replace all merged stores with the new store.
16881   for (unsigned i = 0; i < NumStores; ++i)
16882     CombineTo(StoreNodes[i].MemNode, NewStore);
16883 
16884   AddToWorklist(NewChain.getNode());
16885   return true;
16886 }
16887 
16888 void DAGCombiner::getStoreMergeCandidates(
16889     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16890     SDNode *&RootNode) {
16891   // This holds the base pointer, index, and the offset in bytes from the base
16892   // pointer. We must have a base and an offset. Do not handle stores to undef
16893   // base pointers.
16894   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16895   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
16896     return;
16897 
16898   SDValue Val = peekThroughBitcasts(St->getValue());
16899   StoreSource StoreSrc = getStoreSource(Val);
16900   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16901 
16902   // Match on loadbaseptr if relevant.
16903   EVT MemVT = St->getMemoryVT();
16904   BaseIndexOffset LBasePtr;
16905   EVT LoadVT;
16906   if (StoreSrc == StoreSource::Load) {
16907     auto *Ld = cast<LoadSDNode>(Val);
16908     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16909     LoadVT = Ld->getMemoryVT();
16910     // Load and store should be the same type.
16911     if (MemVT != LoadVT)
16912       return;
16913     // Loads must only have one use.
16914     if (!Ld->hasNUsesOfValue(1, 0))
16915       return;
16916     // The memory operands must not be volatile/indexed/atomic.
16917     // TODO: May be able to relax for unordered atomics (see D66309)
16918     if (!Ld->isSimple() || Ld->isIndexed())
16919       return;
16920   }
16921   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16922                             int64_t &Offset) -> bool {
16923     // The memory operands must not be volatile/indexed/atomic.
16924     // TODO: May be able to relax for unordered atomics (see D66309)
16925     if (!Other->isSimple() || Other->isIndexed())
16926       return false;
16927     // Don't mix temporal stores with non-temporal stores.
16928     if (St->isNonTemporal() != Other->isNonTemporal())
16929       return false;
16930     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16931     // Allow merging constants of different types as integers.
16932     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16933                                            : Other->getMemoryVT() != MemVT;
16934     switch (StoreSrc) {
16935     case StoreSource::Load: {
16936       if (NoTypeMatch)
16937         return false;
16938       // The Load's Base Ptr must also match.
16939       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
16940       if (!OtherLd)
16941         return false;
16942       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16943       if (LoadVT != OtherLd->getMemoryVT())
16944         return false;
16945       // Loads must only have one use.
16946       if (!OtherLd->hasNUsesOfValue(1, 0))
16947         return false;
16948       // The memory operands must not be volatile/indexed/atomic.
16949       // TODO: May be able to relax for unordered atomics (see D66309)
16950       if (!OtherLd->isSimple() || OtherLd->isIndexed())
16951         return false;
16952       // Don't mix temporal loads with non-temporal loads.
16953       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16954         return false;
16955       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16956         return false;
16957       break;
16958     }
16959     case StoreSource::Constant:
16960       if (NoTypeMatch)
16961         return false;
16962       if (!isIntOrFPConstant(OtherBC))
16963         return false;
16964       break;
16965     case StoreSource::Extract:
16966       // Do not merge truncated stores here.
16967       if (Other->isTruncatingStore())
16968         return false;
16969       if (!MemVT.bitsEq(OtherBC.getValueType()))
16970         return false;
16971       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16972           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16973         return false;
16974       break;
16975     default:
16976       llvm_unreachable("Unhandled store source for merging");
16977     }
16978     Ptr = BaseIndexOffset::match(Other, DAG);
16979     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16980   };
16981 
16982   // Check if the pair of StoreNode and the RootNode already bail out many
16983   // times which is over the limit in dependence check.
16984   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16985                                         SDNode *RootNode) -> bool {
16986     auto RootCount = StoreRootCountMap.find(StoreNode);
16987     return RootCount != StoreRootCountMap.end() &&
16988            RootCount->second.first == RootNode &&
16989            RootCount->second.second > StoreMergeDependenceLimit;
16990   };
16991 
16992   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
16993     // This must be a chain use.
16994     if (UseIter.getOperandNo() != 0)
16995       return;
16996     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
16997       BaseIndexOffset Ptr;
16998       int64_t PtrDiff;
16999       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
17000           !OverLimitInDependenceCheck(OtherStore, RootNode))
17001         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
17002     }
17003   };
17004 
17005   // We looking for a root node which is an ancestor to all mergable
17006   // stores. We search up through a load, to our root and then down
17007   // through all children. For instance we will find Store{1,2,3} if
17008   // St is Store1, Store2. or Store3 where the root is not a load
17009   // which always true for nonvolatile ops. TODO: Expand
17010   // the search to find all valid candidates through multiple layers of loads.
17011   //
17012   // Root
17013   // |-------|-------|
17014   // Load    Load    Store3
17015   // |       |
17016   // Store1   Store2
17017   //
17018   // FIXME: We should be able to climb and
17019   // descend TokenFactors to find candidates as well.
17020 
17021   RootNode = St->getChain().getNode();
17022 
17023   unsigned NumNodesExplored = 0;
17024   const unsigned MaxSearchNodes = 1024;
17025   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
17026     RootNode = Ldn->getChain().getNode();
17027     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17028          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
17029       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
17030         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
17031           TryToAddCandidate(I2);
17032       }
17033     }
17034   } else {
17035     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
17036          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
17037       TryToAddCandidate(I);
17038   }
17039 }
17040 
17041 // We need to check that merging these stores does not cause a loop in
17042 // the DAG. Any store candidate may depend on another candidate
17043 // indirectly through its operand (we already consider dependencies
17044 // through the chain). Check in parallel by searching up from
17045 // non-chain operands of candidates.
17046 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
17047     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
17048     SDNode *RootNode) {
17049   // FIXME: We should be able to truncate a full search of
17050   // predecessors by doing a BFS and keeping tabs the originating
17051   // stores from which worklist nodes come from in a similar way to
17052   // TokenFactor simplfication.
17053 
17054   SmallPtrSet<const SDNode *, 32> Visited;
17055   SmallVector<const SDNode *, 8> Worklist;
17056 
17057   // RootNode is a predecessor to all candidates so we need not search
17058   // past it. Add RootNode (peeking through TokenFactors). Do not count
17059   // these towards size check.
17060 
17061   Worklist.push_back(RootNode);
17062   while (!Worklist.empty()) {
17063     auto N = Worklist.pop_back_val();
17064     if (!Visited.insert(N).second)
17065       continue; // Already present in Visited.
17066     if (N->getOpcode() == ISD::TokenFactor) {
17067       for (SDValue Op : N->ops())
17068         Worklist.push_back(Op.getNode());
17069     }
17070   }
17071 
17072   // Don't count pruning nodes towards max.
17073   unsigned int Max = 1024 + Visited.size();
17074   // Search Ops of store candidates.
17075   for (unsigned i = 0; i < NumStores; ++i) {
17076     SDNode *N = StoreNodes[i].MemNode;
17077     // Of the 4 Store Operands:
17078     //   * Chain (Op 0) -> We have already considered these
17079     //                    in candidate selection and can be
17080     //                    safely ignored
17081     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
17082     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
17083     //                       but aren't necessarily fromt the same base node, so
17084     //                       cycles possible (e.g. via indexed store).
17085     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
17086     //               non-indexed stores). Not constant on all targets (e.g. ARM)
17087     //               and so can participate in a cycle.
17088     for (unsigned j = 1; j < N->getNumOperands(); ++j)
17089       Worklist.push_back(N->getOperand(j).getNode());
17090   }
17091   // Search through DAG. We can stop early if we find a store node.
17092   for (unsigned i = 0; i < NumStores; ++i)
17093     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
17094                                      Max)) {
17095       // If the searching bail out, record the StoreNode and RootNode in the
17096       // StoreRootCountMap. If we have seen the pair many times over a limit,
17097       // we won't add the StoreNode into StoreNodes set again.
17098       if (Visited.size() >= Max) {
17099         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
17100         if (RootCount.first == RootNode)
17101           RootCount.second++;
17102         else
17103           RootCount = {RootNode, 1};
17104       }
17105       return false;
17106     }
17107   return true;
17108 }
17109 
17110 unsigned
17111 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
17112                                   int64_t ElementSizeBytes) const {
17113   while (true) {
17114     // Find a store past the width of the first store.
17115     size_t StartIdx = 0;
17116     while ((StartIdx + 1 < StoreNodes.size()) &&
17117            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
17118               StoreNodes[StartIdx + 1].OffsetFromBase)
17119       ++StartIdx;
17120 
17121     // Bail if we don't have enough candidates to merge.
17122     if (StartIdx + 1 >= StoreNodes.size())
17123       return 0;
17124 
17125     // Trim stores that overlapped with the first store.
17126     if (StartIdx)
17127       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
17128 
17129     // Scan the memory operations on the chain and find the first
17130     // non-consecutive store memory address.
17131     unsigned NumConsecutiveStores = 1;
17132     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17133     // Check that the addresses are consecutive starting from the second
17134     // element in the list of stores.
17135     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
17136       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
17137       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17138         break;
17139       NumConsecutiveStores = i + 1;
17140     }
17141     if (NumConsecutiveStores > 1)
17142       return NumConsecutiveStores;
17143 
17144     // There are no consecutive stores at the start of the list.
17145     // Remove the first store and try again.
17146     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
17147   }
17148 }
17149 
17150 bool DAGCombiner::tryStoreMergeOfConstants(
17151     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17152     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
17153   LLVMContext &Context = *DAG.getContext();
17154   const DataLayout &DL = DAG.getDataLayout();
17155   int64_t ElementSizeBytes = MemVT.getStoreSize();
17156   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17157   bool MadeChange = false;
17158 
17159   // Store the constants into memory as one consecutive store.
17160   while (NumConsecutiveStores >= 2) {
17161     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17162     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17163     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17164     unsigned LastLegalType = 1;
17165     unsigned LastLegalVectorType = 1;
17166     bool LastIntegerTrunc = false;
17167     bool NonZero = false;
17168     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
17169     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17170       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
17171       SDValue StoredVal = ST->getValue();
17172       bool IsElementZero = false;
17173       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
17174         IsElementZero = C->isNullValue();
17175       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
17176         IsElementZero = C->getConstantFPValue()->isNullValue();
17177       if (IsElementZero) {
17178         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
17179           FirstZeroAfterNonZero = i;
17180       }
17181       NonZero |= !IsElementZero;
17182 
17183       // Find a legal type for the constant store.
17184       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17185       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17186       bool IsFast = false;
17187 
17188       // Break early when size is too large to be legal.
17189       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17190         break;
17191 
17192       if (TLI.isTypeLegal(StoreTy) &&
17193           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17194           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17195                                  *FirstInChain->getMemOperand(), &IsFast) &&
17196           IsFast) {
17197         LastIntegerTrunc = false;
17198         LastLegalType = i + 1;
17199         // Or check whether a truncstore is legal.
17200       } else if (TLI.getTypeAction(Context, StoreTy) ==
17201                  TargetLowering::TypePromoteInteger) {
17202         EVT LegalizedStoredValTy =
17203             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
17204         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17205             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17206             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17207                                    *FirstInChain->getMemOperand(), &IsFast) &&
17208             IsFast) {
17209           LastIntegerTrunc = true;
17210           LastLegalType = i + 1;
17211         }
17212       }
17213 
17214       // We only use vectors if the constant is known to be zero or the
17215       // target allows it and the function is not marked with the
17216       // noimplicitfloat attribute.
17217       if ((!NonZero ||
17218            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
17219           AllowVectors) {
17220         // Find a legal type for the vector store.
17221         unsigned Elts = (i + 1) * NumMemElts;
17222         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17223         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
17224             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17225             TLI.allowsMemoryAccess(Context, DL, Ty,
17226                                    *FirstInChain->getMemOperand(), &IsFast) &&
17227             IsFast)
17228           LastLegalVectorType = i + 1;
17229       }
17230     }
17231 
17232     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
17233     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
17234 
17235     // Check if we found a legal integer type that creates a meaningful
17236     // merge.
17237     if (NumElem < 2) {
17238       // We know that candidate stores are in order and of correct
17239       // shape. While there is no mergeable sequence from the
17240       // beginning one may start later in the sequence. The only
17241       // reason a merge of size N could have failed where another of
17242       // the same size would not have, is if the alignment has
17243       // improved or we've dropped a non-zero value. Drop as many
17244       // candidates as we can here.
17245       unsigned NumSkip = 1;
17246       while ((NumSkip < NumConsecutiveStores) &&
17247              (NumSkip < FirstZeroAfterNonZero) &&
17248              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17249         NumSkip++;
17250 
17251       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17252       NumConsecutiveStores -= NumSkip;
17253       continue;
17254     }
17255 
17256     // Check that we can merge these candidates without causing a cycle.
17257     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17258                                                   RootNode)) {
17259       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17260       NumConsecutiveStores -= NumElem;
17261       continue;
17262     }
17263 
17264     MadeChange |= mergeStoresOfConstantsOrVecElts(
17265         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
17266 
17267     // Remove merged stores for next iteration.
17268     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17269     NumConsecutiveStores -= NumElem;
17270   }
17271   return MadeChange;
17272 }
17273 
17274 bool DAGCombiner::tryStoreMergeOfExtracts(
17275     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
17276     EVT MemVT, SDNode *RootNode) {
17277   LLVMContext &Context = *DAG.getContext();
17278   const DataLayout &DL = DAG.getDataLayout();
17279   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17280   bool MadeChange = false;
17281 
17282   // Loop on Consecutive Stores on success.
17283   while (NumConsecutiveStores >= 2) {
17284     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17285     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17286     unsigned FirstStoreAlign = FirstInChain->getAlignment();
17287     unsigned NumStoresToMerge = 1;
17288     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17289       // Find a legal type for the vector store.
17290       unsigned Elts = (i + 1) * NumMemElts;
17291       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
17292       bool IsFast = false;
17293 
17294       // Break early when size is too large to be legal.
17295       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
17296         break;
17297 
17298       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
17299           TLI.allowsMemoryAccess(Context, DL, Ty,
17300                                  *FirstInChain->getMemOperand(), &IsFast) &&
17301           IsFast)
17302         NumStoresToMerge = i + 1;
17303     }
17304 
17305     // Check if we found a legal integer type creating a meaningful
17306     // merge.
17307     if (NumStoresToMerge < 2) {
17308       // We know that candidate stores are in order and of correct
17309       // shape. While there is no mergeable sequence from the
17310       // beginning one may start later in the sequence. The only
17311       // reason a merge of size N could have failed where another of
17312       // the same size would not have, is if the alignment has
17313       // improved. Drop as many candidates as we can here.
17314       unsigned NumSkip = 1;
17315       while ((NumSkip < NumConsecutiveStores) &&
17316              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
17317         NumSkip++;
17318 
17319       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17320       NumConsecutiveStores -= NumSkip;
17321       continue;
17322     }
17323 
17324     // Check that we can merge these candidates without causing a cycle.
17325     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
17326                                                   RootNode)) {
17327       StoreNodes.erase(StoreNodes.begin(),
17328                        StoreNodes.begin() + NumStoresToMerge);
17329       NumConsecutiveStores -= NumStoresToMerge;
17330       continue;
17331     }
17332 
17333     MadeChange |= mergeStoresOfConstantsOrVecElts(
17334         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
17335 
17336     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17337     NumConsecutiveStores -= NumStoresToMerge;
17338   }
17339   return MadeChange;
17340 }
17341 
17342 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17343                                        unsigned NumConsecutiveStores, EVT MemVT,
17344                                        SDNode *RootNode, bool AllowVectors,
17345                                        bool IsNonTemporalStore,
17346                                        bool IsNonTemporalLoad) {
17347   LLVMContext &Context = *DAG.getContext();
17348   const DataLayout &DL = DAG.getDataLayout();
17349   int64_t ElementSizeBytes = MemVT.getStoreSize();
17350   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17351   bool MadeChange = false;
17352 
17353   // Look for load nodes which are used by the stored values.
17354   SmallVector<MemOpLink, 8> LoadNodes;
17355 
17356   // Find acceptable loads. Loads need to have the same chain (token factor),
17357   // must not be zext, volatile, indexed, and they must be consecutive.
17358   BaseIndexOffset LdBasePtr;
17359 
17360   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17361     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17362     SDValue Val = peekThroughBitcasts(St->getValue());
17363     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17364 
17365     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17366     // If this is not the first ptr that we check.
17367     int64_t LdOffset = 0;
17368     if (LdBasePtr.getBase().getNode()) {
17369       // The base ptr must be the same.
17370       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17371         break;
17372     } else {
17373       // Check that all other base pointers are the same as this one.
17374       LdBasePtr = LdPtr;
17375     }
17376 
17377     // We found a potential memory operand to merge.
17378     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17379   }
17380 
17381   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17382     Align RequiredAlignment;
17383     bool NeedRotate = false;
17384     if (LoadNodes.size() == 2) {
17385       // If we have load/store pair instructions and we only have two values,
17386       // don't bother merging.
17387       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17388           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17389         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17390         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17391         break;
17392       }
17393       // If the loads are reversed, see if we can rotate the halves into place.
17394       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17395       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17396       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17397       if (Offset0 - Offset1 == ElementSizeBytes &&
17398           (hasOperation(ISD::ROTL, PairVT) ||
17399            hasOperation(ISD::ROTR, PairVT))) {
17400         std::swap(LoadNodes[0], LoadNodes[1]);
17401         NeedRotate = true;
17402       }
17403     }
17404     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17405     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17406     Align FirstStoreAlign = FirstInChain->getAlign();
17407     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17408 
17409     // Scan the memory operations on the chain and find the first
17410     // non-consecutive load memory address. These variables hold the index in
17411     // the store node array.
17412 
17413     unsigned LastConsecutiveLoad = 1;
17414 
17415     // This variable refers to the size and not index in the array.
17416     unsigned LastLegalVectorType = 1;
17417     unsigned LastLegalIntegerType = 1;
17418     bool isDereferenceable = true;
17419     bool DoIntegerTruncate = false;
17420     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
17421     SDValue LoadChain = FirstLoad->getChain();
17422     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17423       // All loads must share the same chain.
17424       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17425         break;
17426 
17427       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17428       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17429         break;
17430       LastConsecutiveLoad = i;
17431 
17432       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17433         isDereferenceable = false;
17434 
17435       // Find a legal type for the vector store.
17436       unsigned Elts = (i + 1) * NumMemElts;
17437       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17438 
17439       // Break early when size is too large to be legal.
17440       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17441         break;
17442 
17443       bool IsFastSt = false;
17444       bool IsFastLd = false;
17445       if (TLI.isTypeLegal(StoreTy) &&
17446           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17447           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17448                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17449           IsFastSt &&
17450           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17451                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17452           IsFastLd) {
17453         LastLegalVectorType = i + 1;
17454       }
17455 
17456       // Find a legal type for the integer store.
17457       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17458       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17459       if (TLI.isTypeLegal(StoreTy) &&
17460           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17461           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17462                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17463           IsFastSt &&
17464           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17465                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17466           IsFastLd) {
17467         LastLegalIntegerType = i + 1;
17468         DoIntegerTruncate = false;
17469         // Or check whether a truncstore and extload is legal.
17470       } else if (TLI.getTypeAction(Context, StoreTy) ==
17471                  TargetLowering::TypePromoteInteger) {
17472         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17473         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17474             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17475             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17476             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17477             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17478             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17479                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17480             IsFastSt &&
17481             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17482                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
17483             IsFastLd) {
17484           LastLegalIntegerType = i + 1;
17485           DoIntegerTruncate = true;
17486         }
17487       }
17488     }
17489 
17490     // Only use vector types if the vector type is larger than the integer
17491     // type. If they are the same, use integers.
17492     bool UseVectorTy =
17493         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17494     unsigned LastLegalType =
17495         std::max(LastLegalVectorType, LastLegalIntegerType);
17496 
17497     // We add +1 here because the LastXXX variables refer to location while
17498     // the NumElem refers to array/index size.
17499     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17500     NumElem = std::min(LastLegalType, NumElem);
17501     Align FirstLoadAlign = FirstLoad->getAlign();
17502 
17503     if (NumElem < 2) {
17504       // We know that candidate stores are in order and of correct
17505       // shape. While there is no mergeable sequence from the
17506       // beginning one may start later in the sequence. The only
17507       // reason a merge of size N could have failed where another of
17508       // the same size would not have is if the alignment or either
17509       // the load or store has improved. Drop as many candidates as we
17510       // can here.
17511       unsigned NumSkip = 1;
17512       while ((NumSkip < LoadNodes.size()) &&
17513              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17514              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17515         NumSkip++;
17516       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17517       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17518       NumConsecutiveStores -= NumSkip;
17519       continue;
17520     }
17521 
17522     // Check that we can merge these candidates without causing a cycle.
17523     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17524                                                   RootNode)) {
17525       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17526       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17527       NumConsecutiveStores -= NumElem;
17528       continue;
17529     }
17530 
17531     // Find if it is better to use vectors or integers to load and store
17532     // to memory.
17533     EVT JointMemOpVT;
17534     if (UseVectorTy) {
17535       // Find a legal type for the vector store.
17536       unsigned Elts = NumElem * NumMemElts;
17537       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17538     } else {
17539       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17540       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17541     }
17542 
17543     SDLoc LoadDL(LoadNodes[0].MemNode);
17544     SDLoc StoreDL(StoreNodes[0].MemNode);
17545 
17546     // The merged loads are required to have the same incoming chain, so
17547     // using the first's chain is acceptable.
17548 
17549     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17550     AddToWorklist(NewStoreChain.getNode());
17551 
17552     MachineMemOperand::Flags LdMMOFlags =
17553         isDereferenceable ? MachineMemOperand::MODereferenceable
17554                           : MachineMemOperand::MONone;
17555     if (IsNonTemporalLoad)
17556       LdMMOFlags |= MachineMemOperand::MONonTemporal;
17557 
17558     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17559                                               ? MachineMemOperand::MONonTemporal
17560                                               : MachineMemOperand::MONone;
17561 
17562     SDValue NewLoad, NewStore;
17563     if (UseVectorTy || !DoIntegerTruncate) {
17564       NewLoad = DAG.getLoad(
17565           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17566           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17567       SDValue StoreOp = NewLoad;
17568       if (NeedRotate) {
17569         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17570         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
17571                "Unexpected type for rotate-able load pair");
17572         SDValue RotAmt =
17573             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17574         // Target can convert to the identical ROTR if it does not have ROTL.
17575         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17576       }
17577       NewStore = DAG.getStore(
17578           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17579           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17580     } else { // This must be the truncstore/extload case
17581       EVT ExtendedTy =
17582           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17583       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17584                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
17585                                FirstLoad->getPointerInfo(), JointMemOpVT,
17586                                FirstLoadAlign, LdMMOFlags);
17587       NewStore = DAG.getTruncStore(
17588           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17589           FirstInChain->getPointerInfo(), JointMemOpVT,
17590           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17591     }
17592 
17593     // Transfer chain users from old loads to the new load.
17594     for (unsigned i = 0; i < NumElem; ++i) {
17595       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17596       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17597                                     SDValue(NewLoad.getNode(), 1));
17598     }
17599 
17600     // Replace all stores with the new store. Recursively remove corresponding
17601     // values if they are no longer used.
17602     for (unsigned i = 0; i < NumElem; ++i) {
17603       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17604       CombineTo(StoreNodes[i].MemNode, NewStore);
17605       if (Val.getNode()->use_empty())
17606         recursivelyDeleteUnusedNodes(Val.getNode());
17607     }
17608 
17609     MadeChange = true;
17610     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17611     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17612     NumConsecutiveStores -= NumElem;
17613   }
17614   return MadeChange;
17615 }
17616 
17617 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17618   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17619     return false;
17620 
17621   // TODO: Extend this function to merge stores of scalable vectors.
17622   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17623   // store since we know <vscale x 16 x i8> is exactly twice as large as
17624   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17625   EVT MemVT = St->getMemoryVT();
17626   if (MemVT.isScalableVector())
17627     return false;
17628   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17629     return false;
17630 
17631   // This function cannot currently deal with non-byte-sized memory sizes.
17632   int64_t ElementSizeBytes = MemVT.getStoreSize();
17633   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17634     return false;
17635 
17636   // Do not bother looking at stored values that are not constants, loads, or
17637   // extracted vector elements.
17638   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17639   const StoreSource StoreSrc = getStoreSource(StoredVal);
17640   if (StoreSrc == StoreSource::Unknown)
17641     return false;
17642 
17643   SmallVector<MemOpLink, 8> StoreNodes;
17644   SDNode *RootNode;
17645   // Find potential store merge candidates by searching through chain sub-DAG
17646   getStoreMergeCandidates(St, StoreNodes, RootNode);
17647 
17648   // Check if there is anything to merge.
17649   if (StoreNodes.size() < 2)
17650     return false;
17651 
17652   // Sort the memory operands according to their distance from the
17653   // base pointer.
17654   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17655     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17656   });
17657 
17658   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17659       Attribute::NoImplicitFloat);
17660   bool IsNonTemporalStore = St->isNonTemporal();
17661   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17662                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17663 
17664   // Store Merge attempts to merge the lowest stores. This generally
17665   // works out as if successful, as the remaining stores are checked
17666   // after the first collection of stores is merged. However, in the
17667   // case that a non-mergeable store is found first, e.g., {p[-2],
17668   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17669   // mergeable cases. To prevent this, we prune such stores from the
17670   // front of StoreNodes here.
17671   bool MadeChange = false;
17672   while (StoreNodes.size() > 1) {
17673     unsigned NumConsecutiveStores =
17674         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17675     // There are no more stores in the list to examine.
17676     if (NumConsecutiveStores == 0)
17677       return MadeChange;
17678 
17679     // We have at least 2 consecutive stores. Try to merge them.
17680     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17681     switch (StoreSrc) {
17682     case StoreSource::Constant:
17683       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17684                                              MemVT, RootNode, AllowVectors);
17685       break;
17686 
17687     case StoreSource::Extract:
17688       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17689                                             MemVT, RootNode);
17690       break;
17691 
17692     case StoreSource::Load:
17693       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17694                                          MemVT, RootNode, AllowVectors,
17695                                          IsNonTemporalStore, IsNonTemporalLoad);
17696       break;
17697 
17698     default:
17699       llvm_unreachable("Unhandled store source type");
17700     }
17701   }
17702   return MadeChange;
17703 }
17704 
17705 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17706   SDLoc SL(ST);
17707   SDValue ReplStore;
17708 
17709   // Replace the chain to avoid dependency.
17710   if (ST->isTruncatingStore()) {
17711     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17712                                   ST->getBasePtr(), ST->getMemoryVT(),
17713                                   ST->getMemOperand());
17714   } else {
17715     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17716                              ST->getMemOperand());
17717   }
17718 
17719   // Create token to keep both nodes around.
17720   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17721                               MVT::Other, ST->getChain(), ReplStore);
17722 
17723   // Make sure the new and old chains are cleaned up.
17724   AddToWorklist(Token.getNode());
17725 
17726   // Don't add users to work list.
17727   return CombineTo(ST, Token, false);
17728 }
17729 
17730 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17731   SDValue Value = ST->getValue();
17732   if (Value.getOpcode() == ISD::TargetConstantFP)
17733     return SDValue();
17734 
17735   if (!ISD::isNormalStore(ST))
17736     return SDValue();
17737 
17738   SDLoc DL(ST);
17739 
17740   SDValue Chain = ST->getChain();
17741   SDValue Ptr = ST->getBasePtr();
17742 
17743   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17744 
17745   // NOTE: If the original store is volatile, this transform must not increase
17746   // the number of stores.  For example, on x86-32 an f64 can be stored in one
17747   // processor operation but an i64 (which is not legal) requires two.  So the
17748   // transform should not be done in this case.
17749 
17750   SDValue Tmp;
17751   switch (CFP->getSimpleValueType(0).SimpleTy) {
17752   default:
17753     llvm_unreachable("Unknown FP type");
17754   case MVT::f16:    // We don't do this for these yet.
17755   case MVT::f80:
17756   case MVT::f128:
17757   case MVT::ppcf128:
17758     return SDValue();
17759   case MVT::f32:
17760     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17761         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17762       ;
17763       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17764                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17765                             MVT::i32);
17766       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17767     }
17768 
17769     return SDValue();
17770   case MVT::f64:
17771     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17772          ST->isSimple()) ||
17773         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17774       ;
17775       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17776                             getZExtValue(), SDLoc(CFP), MVT::i64);
17777       return DAG.getStore(Chain, DL, Tmp,
17778                           Ptr, ST->getMemOperand());
17779     }
17780 
17781     if (ST->isSimple() &&
17782         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17783       // Many FP stores are not made apparent until after legalize, e.g. for
17784       // argument passing.  Since this is so common, custom legalize the
17785       // 64-bit integer store into two 32-bit stores.
17786       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17787       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17788       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17789       if (DAG.getDataLayout().isBigEndian())
17790         std::swap(Lo, Hi);
17791 
17792       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17793       AAMDNodes AAInfo = ST->getAAInfo();
17794 
17795       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17796                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17797       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17798       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17799                                  ST->getPointerInfo().getWithOffset(4),
17800                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17801       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17802                          St0, St1);
17803     }
17804 
17805     return SDValue();
17806   }
17807 }
17808 
17809 SDValue DAGCombiner::visitSTORE(SDNode *N) {
17810   StoreSDNode *ST  = cast<StoreSDNode>(N);
17811   SDValue Chain = ST->getChain();
17812   SDValue Value = ST->getValue();
17813   SDValue Ptr   = ST->getBasePtr();
17814 
17815   // If this is a store of a bit convert, store the input value if the
17816   // resultant store does not need a higher alignment than the original.
17817   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17818       ST->isUnindexed()) {
17819     EVT SVT = Value.getOperand(0).getValueType();
17820     // If the store is volatile, we only want to change the store type if the
17821     // resulting store is legal. Otherwise we might increase the number of
17822     // memory accesses. We don't care if the original type was legal or not
17823     // as we assume software couldn't rely on the number of accesses of an
17824     // illegal type.
17825     // TODO: May be able to relax for unordered atomics (see D66309)
17826     if (((!LegalOperations && ST->isSimple()) ||
17827          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17828         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17829                                      DAG, *ST->getMemOperand())) {
17830       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17831                           ST->getMemOperand());
17832     }
17833   }
17834 
17835   // Turn 'store undef, Ptr' -> nothing.
17836   if (Value.isUndef() && ST->isUnindexed())
17837     return Chain;
17838 
17839   // Try to infer better alignment information than the store already has.
17840   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17841     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17842       if (*Alignment > ST->getAlign() &&
17843           isAligned(*Alignment, ST->getSrcValueOffset())) {
17844         SDValue NewStore =
17845             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17846                               ST->getMemoryVT(), *Alignment,
17847                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17848         // NewStore will always be N as we are only refining the alignment
17849         assert(NewStore.getNode() == N);
17850         (void)NewStore;
17851       }
17852     }
17853   }
17854 
17855   // Try transforming a pair floating point load / store ops to integer
17856   // load / store ops.
17857   if (SDValue NewST = TransformFPLoadStorePair(N))
17858     return NewST;
17859 
17860   // Try transforming several stores into STORE (BSWAP).
17861   if (SDValue Store = mergeTruncStores(ST))
17862     return Store;
17863 
17864   if (ST->isUnindexed()) {
17865     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17866     // adjacent stores.
17867     if (findBetterNeighborChains(ST)) {
17868       // replaceStoreChain uses CombineTo, which handled all of the worklist
17869       // manipulation. Return the original node to not do anything else.
17870       return SDValue(ST, 0);
17871     }
17872     Chain = ST->getChain();
17873   }
17874 
17875   // FIXME: is there such a thing as a truncating indexed store?
17876   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17877       Value.getValueType().isInteger() &&
17878       (!isa<ConstantSDNode>(Value) ||
17879        !cast<ConstantSDNode>(Value)->isOpaque())) {
17880     APInt TruncDemandedBits =
17881         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17882                              ST->getMemoryVT().getScalarSizeInBits());
17883 
17884     // See if we can simplify the input to this truncstore with knowledge that
17885     // only the low bits are being used.  For example:
17886     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17887     AddToWorklist(Value.getNode());
17888     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17889       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17890                                ST->getMemOperand());
17891 
17892     // Otherwise, see if we can simplify the operation with
17893     // SimplifyDemandedBits, which only works if the value has a single use.
17894     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17895       // Re-visit the store if anything changed and the store hasn't been merged
17896       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17897       // node back to the worklist if necessary, but we also need to re-visit
17898       // the Store node itself.
17899       if (N->getOpcode() != ISD::DELETED_NODE)
17900         AddToWorklist(N);
17901       return SDValue(N, 0);
17902     }
17903   }
17904 
17905   // If this is a load followed by a store to the same location, then the store
17906   // is dead/noop.
17907   // TODO: Can relax for unordered atomics (see D66309)
17908   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17909     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17910         ST->isUnindexed() && ST->isSimple() &&
17911         // There can't be any side effects between the load and store, such as
17912         // a call or store.
17913         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17914       // The store is dead, remove it.
17915       return Chain;
17916     }
17917   }
17918 
17919   // TODO: Can relax for unordered atomics (see D66309)
17920   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17921     if (ST->isUnindexed() && ST->isSimple() &&
17922         ST1->isUnindexed() && ST1->isSimple()) {
17923       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17924           ST->getMemoryVT() == ST1->getMemoryVT()) {
17925         // If this is a store followed by a store with the same value to the
17926         // same location, then the store is dead/noop.
17927         return Chain;
17928       }
17929 
17930       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17931           !ST1->getBasePtr().isUndef() &&
17932           // BaseIndexOffset and the code below requires knowing the size
17933           // of a vector, so bail out if MemoryVT is scalable.
17934           !ST->getMemoryVT().isScalableVector() &&
17935           !ST1->getMemoryVT().isScalableVector()) {
17936         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17937         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17938         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
17939         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
17940         // If this is a store who's preceding store to a subset of the current
17941         // location and no one other node is chained to that store we can
17942         // effectively drop the store. Do not remove stores to undef as they may
17943         // be used as data sinks.
17944         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17945           CombineTo(ST1, ST1->getChain());
17946           return SDValue();
17947         }
17948       }
17949     }
17950   }
17951 
17952   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17953   // truncating store.  We can do this even if this is already a truncstore.
17954   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17955       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17956       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17957                             ST->getMemoryVT())) {
17958     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17959                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17960   }
17961 
17962   // Always perform this optimization before types are legal. If the target
17963   // prefers, also try this after legalization to catch stores that were created
17964   // by intrinsics or other nodes.
17965   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17966     while (true) {
17967       // There can be multiple store sequences on the same chain.
17968       // Keep trying to merge store sequences until we are unable to do so
17969       // or until we merge the last store on the chain.
17970       bool Changed = mergeConsecutiveStores(ST);
17971       if (!Changed) break;
17972       // Return N as merge only uses CombineTo and no worklist clean
17973       // up is necessary.
17974       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17975         return SDValue(N, 0);
17976     }
17977   }
17978 
17979   // Try transforming N to an indexed store.
17980   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17981     return SDValue(N, 0);
17982 
17983   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17984   //
17985   // Make sure to do this only after attempting to merge stores in order to
17986   //  avoid changing the types of some subset of stores due to visit order,
17987   //  preventing their merging.
17988   if (isa<ConstantFPSDNode>(ST->getValue())) {
17989     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17990       return NewSt;
17991   }
17992 
17993   if (SDValue NewSt = splitMergedValStore(ST))
17994     return NewSt;
17995 
17996   return ReduceLoadOpStoreWidth(N);
17997 }
17998 
17999 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
18000   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
18001   if (!LifetimeEnd->hasOffset())
18002     return SDValue();
18003 
18004   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
18005                                         LifetimeEnd->getOffset(), false);
18006 
18007   // We walk up the chains to find stores.
18008   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
18009   while (!Chains.empty()) {
18010     SDValue Chain = Chains.pop_back_val();
18011     if (!Chain.hasOneUse())
18012       continue;
18013     switch (Chain.getOpcode()) {
18014     case ISD::TokenFactor:
18015       for (unsigned Nops = Chain.getNumOperands(); Nops;)
18016         Chains.push_back(Chain.getOperand(--Nops));
18017       break;
18018     case ISD::LIFETIME_START:
18019     case ISD::LIFETIME_END:
18020       // We can forward past any lifetime start/end that can be proven not to
18021       // alias the node.
18022       if (!isAlias(Chain.getNode(), N))
18023         Chains.push_back(Chain.getOperand(0));
18024       break;
18025     case ISD::STORE: {
18026       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
18027       // TODO: Can relax for unordered atomics (see D66309)
18028       if (!ST->isSimple() || ST->isIndexed())
18029         continue;
18030       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
18031       // The bounds of a scalable store are not known until runtime, so this
18032       // store cannot be elided.
18033       if (StoreSize.isScalable())
18034         continue;
18035       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
18036       // If we store purely within object bounds just before its lifetime ends,
18037       // we can remove the store.
18038       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
18039                                    StoreSize.getFixedSize() * 8)) {
18040         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
18041                    dbgs() << "\nwithin LIFETIME_END of : ";
18042                    LifetimeEndBase.dump(); dbgs() << "\n");
18043         CombineTo(ST, ST->getChain());
18044         return SDValue(N, 0);
18045       }
18046     }
18047     }
18048   }
18049   return SDValue();
18050 }
18051 
18052 /// For the instruction sequence of store below, F and I values
18053 /// are bundled together as an i64 value before being stored into memory.
18054 /// Sometimes it is more efficent to generate separate stores for F and I,
18055 /// which can remove the bitwise instructions or sink them to colder places.
18056 ///
18057 ///   (store (or (zext (bitcast F to i32) to i64),
18058 ///              (shl (zext I to i64), 32)), addr)  -->
18059 ///   (store F, addr) and (store I, addr+4)
18060 ///
18061 /// Similarly, splitting for other merged store can also be beneficial, like:
18062 /// For pair of {i32, i32}, i64 store --> two i32 stores.
18063 /// For pair of {i32, i16}, i64 store --> two i32 stores.
18064 /// For pair of {i16, i16}, i32 store --> two i16 stores.
18065 /// For pair of {i16, i8},  i32 store --> two i16 stores.
18066 /// For pair of {i8, i8},   i16 store --> two i8 stores.
18067 ///
18068 /// We allow each target to determine specifically which kind of splitting is
18069 /// supported.
18070 ///
18071 /// The store patterns are commonly seen from the simple code snippet below
18072 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
18073 ///   void goo(const std::pair<int, float> &);
18074 ///   hoo() {
18075 ///     ...
18076 ///     goo(std::make_pair(tmp, ftmp));
18077 ///     ...
18078 ///   }
18079 ///
18080 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
18081   if (OptLevel == CodeGenOpt::None)
18082     return SDValue();
18083 
18084   // Can't change the number of memory accesses for a volatile store or break
18085   // atomicity for an atomic one.
18086   if (!ST->isSimple())
18087     return SDValue();
18088 
18089   SDValue Val = ST->getValue();
18090   SDLoc DL(ST);
18091 
18092   // Match OR operand.
18093   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
18094     return SDValue();
18095 
18096   // Match SHL operand and get Lower and Higher parts of Val.
18097   SDValue Op1 = Val.getOperand(0);
18098   SDValue Op2 = Val.getOperand(1);
18099   SDValue Lo, Hi;
18100   if (Op1.getOpcode() != ISD::SHL) {
18101     std::swap(Op1, Op2);
18102     if (Op1.getOpcode() != ISD::SHL)
18103       return SDValue();
18104   }
18105   Lo = Op2;
18106   Hi = Op1.getOperand(0);
18107   if (!Op1.hasOneUse())
18108     return SDValue();
18109 
18110   // Match shift amount to HalfValBitSize.
18111   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
18112   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
18113   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
18114     return SDValue();
18115 
18116   // Lo and Hi are zero-extended from int with size less equal than 32
18117   // to i64.
18118   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
18119       !Lo.getOperand(0).getValueType().isScalarInteger() ||
18120       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
18121       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
18122       !Hi.getOperand(0).getValueType().isScalarInteger() ||
18123       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
18124     return SDValue();
18125 
18126   // Use the EVT of low and high parts before bitcast as the input
18127   // of target query.
18128   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
18129                   ? Lo.getOperand(0).getValueType()
18130                   : Lo.getValueType();
18131   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
18132                    ? Hi.getOperand(0).getValueType()
18133                    : Hi.getValueType();
18134   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
18135     return SDValue();
18136 
18137   // Start to split store.
18138   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
18139   AAMDNodes AAInfo = ST->getAAInfo();
18140 
18141   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
18142   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
18143   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
18144   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
18145 
18146   SDValue Chain = ST->getChain();
18147   SDValue Ptr = ST->getBasePtr();
18148   // Lower value store.
18149   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
18150                              ST->getOriginalAlign(), MMOFlags, AAInfo);
18151   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
18152   // Higher value store.
18153   SDValue St1 = DAG.getStore(
18154       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
18155       ST->getOriginalAlign(), MMOFlags, AAInfo);
18156   return St1;
18157 }
18158 
18159 /// Convert a disguised subvector insertion into a shuffle:
18160 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
18161   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
18162          "Expected extract_vector_elt");
18163   SDValue InsertVal = N->getOperand(1);
18164   SDValue Vec = N->getOperand(0);
18165 
18166   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
18167   // InsIndex)
18168   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
18169   //   CONCAT_VECTORS.
18170   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
18171       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18172       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
18173     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
18174     ArrayRef<int> Mask = SVN->getMask();
18175 
18176     SDValue X = Vec.getOperand(0);
18177     SDValue Y = Vec.getOperand(1);
18178 
18179     // Vec's operand 0 is using indices from 0 to N-1 and
18180     // operand 1 from N to 2N - 1, where N is the number of
18181     // elements in the vectors.
18182     SDValue InsertVal0 = InsertVal.getOperand(0);
18183     int ElementOffset = -1;
18184 
18185     // We explore the inputs of the shuffle in order to see if we find the
18186     // source of the extract_vector_elt. If so, we can use it to modify the
18187     // shuffle rather than perform an insert_vector_elt.
18188     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
18189     ArgWorkList.emplace_back(Mask.size(), Y);
18190     ArgWorkList.emplace_back(0, X);
18191 
18192     while (!ArgWorkList.empty()) {
18193       int ArgOffset;
18194       SDValue ArgVal;
18195       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
18196 
18197       if (ArgVal == InsertVal0) {
18198         ElementOffset = ArgOffset;
18199         break;
18200       }
18201 
18202       // Peek through concat_vector.
18203       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
18204         int CurrentArgOffset =
18205             ArgOffset + ArgVal.getValueType().getVectorNumElements();
18206         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
18207         for (SDValue Op : reverse(ArgVal->ops())) {
18208           CurrentArgOffset -= Step;
18209           ArgWorkList.emplace_back(CurrentArgOffset, Op);
18210         }
18211 
18212         // Make sure we went through all the elements and did not screw up index
18213         // computation.
18214         assert(CurrentArgOffset == ArgOffset);
18215       }
18216     }
18217 
18218     if (ElementOffset != -1) {
18219       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
18220 
18221       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
18222       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
18223       assert(NewMask[InsIndex] <
18224                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
18225              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
18226 
18227       SDValue LegalShuffle =
18228               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
18229                                           Y, NewMask, DAG);
18230       if (LegalShuffle)
18231         return LegalShuffle;
18232     }
18233   }
18234 
18235   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
18236   // bitcast(shuffle (bitcast V), (extended X), Mask)
18237   // Note: We do not use an insert_subvector node because that requires a
18238   // legal subvector type.
18239   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
18240       !InsertVal.getOperand(0).getValueType().isVector())
18241     return SDValue();
18242 
18243   SDValue SubVec = InsertVal.getOperand(0);
18244   SDValue DestVec = N->getOperand(0);
18245   EVT SubVecVT = SubVec.getValueType();
18246   EVT VT = DestVec.getValueType();
18247   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
18248   // If the source only has a single vector element, the cost of creating adding
18249   // it to a vector is likely to exceed the cost of a insert_vector_elt.
18250   if (NumSrcElts == 1)
18251     return SDValue();
18252   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
18253   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
18254 
18255   // Step 1: Create a shuffle mask that implements this insert operation. The
18256   // vector that we are inserting into will be operand 0 of the shuffle, so
18257   // those elements are just 'i'. The inserted subvector is in the first
18258   // positions of operand 1 of the shuffle. Example:
18259   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
18260   SmallVector<int, 16> Mask(NumMaskVals);
18261   for (unsigned i = 0; i != NumMaskVals; ++i) {
18262     if (i / NumSrcElts == InsIndex)
18263       Mask[i] = (i % NumSrcElts) + NumMaskVals;
18264     else
18265       Mask[i] = i;
18266   }
18267 
18268   // Bail out if the target can not handle the shuffle we want to create.
18269   EVT SubVecEltVT = SubVecVT.getVectorElementType();
18270   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
18271   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
18272     return SDValue();
18273 
18274   // Step 2: Create a wide vector from the inserted source vector by appending
18275   // undefined elements. This is the same size as our destination vector.
18276   SDLoc DL(N);
18277   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
18278   ConcatOps[0] = SubVec;
18279   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
18280 
18281   // Step 3: Shuffle in the padded subvector.
18282   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
18283   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
18284   AddToWorklist(PaddedSubV.getNode());
18285   AddToWorklist(DestVecBC.getNode());
18286   AddToWorklist(Shuf.getNode());
18287   return DAG.getBitcast(VT, Shuf);
18288 }
18289 
18290 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
18291   SDValue InVec = N->getOperand(0);
18292   SDValue InVal = N->getOperand(1);
18293   SDValue EltNo = N->getOperand(2);
18294   SDLoc DL(N);
18295 
18296   EVT VT = InVec.getValueType();
18297   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
18298 
18299   // Insert into out-of-bounds element is undefined.
18300   if (IndexC && VT.isFixedLengthVector() &&
18301       IndexC->getZExtValue() >= VT.getVectorNumElements())
18302     return DAG.getUNDEF(VT);
18303 
18304   // Remove redundant insertions:
18305   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
18306   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18307       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
18308     return InVec;
18309 
18310   if (!IndexC) {
18311     // If this is variable insert to undef vector, it might be better to splat:
18312     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
18313     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
18314       if (VT.isScalableVector())
18315         return DAG.getSplatVector(VT, DL, InVal);
18316       else {
18317         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
18318         return DAG.getBuildVector(VT, DL, Ops);
18319       }
18320     }
18321     return SDValue();
18322   }
18323 
18324   if (VT.isScalableVector())
18325     return SDValue();
18326 
18327   unsigned NumElts = VT.getVectorNumElements();
18328 
18329   // We must know which element is being inserted for folds below here.
18330   unsigned Elt = IndexC->getZExtValue();
18331   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18332     return Shuf;
18333 
18334   // Canonicalize insert_vector_elt dag nodes.
18335   // Example:
18336   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18337   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18338   //
18339   // Do this only if the child insert_vector node has one use; also
18340   // do this only if indices are both constants and Idx1 < Idx0.
18341   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18342       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18343     unsigned OtherElt = InVec.getConstantOperandVal(2);
18344     if (Elt < OtherElt) {
18345       // Swap nodes.
18346       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18347                                   InVec.getOperand(0), InVal, EltNo);
18348       AddToWorklist(NewOp.getNode());
18349       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18350                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18351     }
18352   }
18353 
18354   // If we can't generate a legal BUILD_VECTOR, exit
18355   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18356     return SDValue();
18357 
18358   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18359   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18360   // vector elements.
18361   SmallVector<SDValue, 8> Ops;
18362   // Do not combine these two vectors if the output vector will not replace
18363   // the input vector.
18364   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18365     Ops.append(InVec.getNode()->op_begin(),
18366                InVec.getNode()->op_end());
18367   } else if (InVec.isUndef()) {
18368     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18369   } else {
18370     return SDValue();
18371   }
18372   assert(Ops.size() == NumElts && "Unexpected vector size");
18373 
18374   // Insert the element
18375   if (Elt < Ops.size()) {
18376     // All the operands of BUILD_VECTOR must have the same type;
18377     // we enforce that here.
18378     EVT OpVT = Ops[0].getValueType();
18379     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18380   }
18381 
18382   // Return the new vector
18383   return DAG.getBuildVector(VT, DL, Ops);
18384 }
18385 
18386 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18387                                                   SDValue EltNo,
18388                                                   LoadSDNode *OriginalLoad) {
18389   assert(OriginalLoad->isSimple());
18390 
18391   EVT ResultVT = EVE->getValueType(0);
18392   EVT VecEltVT = InVecVT.getVectorElementType();
18393 
18394   // If the vector element type is not a multiple of a byte then we are unable
18395   // to correctly compute an address to load only the extracted element as a
18396   // scalar.
18397   if (!VecEltVT.isByteSized())
18398     return SDValue();
18399 
18400   Align Alignment = OriginalLoad->getAlign();
18401   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18402       VecEltVT.getTypeForEVT(*DAG.getContext()));
18403 
18404   if (NewAlign > Alignment ||
18405       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18406     return SDValue();
18407 
18408   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18409     ISD::NON_EXTLOAD : ISD::EXTLOAD;
18410   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18411     return SDValue();
18412 
18413   Alignment = NewAlign;
18414 
18415   SDValue NewPtr = OriginalLoad->getBasePtr();
18416   SDValue Offset;
18417   EVT PtrType = NewPtr.getValueType();
18418   MachinePointerInfo MPI;
18419   SDLoc DL(EVE);
18420   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18421     int Elt = ConstEltNo->getZExtValue();
18422     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18423     Offset = DAG.getConstant(PtrOff, DL, PtrType);
18424     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18425   } else {
18426     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
18427     Offset = DAG.getNode(
18428         ISD::MUL, DL, PtrType, Offset,
18429         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
18430     // Discard the pointer info except the address space because the memory
18431     // operand can't represent this new access since the offset is variable.
18432     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18433   }
18434   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
18435 
18436   // The replacement we need to do here is a little tricky: we need to
18437   // replace an extractelement of a load with a load.
18438   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18439   // Note that this replacement assumes that the extractvalue is the only
18440   // use of the load; that's okay because we don't want to perform this
18441   // transformation in other cases anyway.
18442   SDValue Load;
18443   SDValue Chain;
18444   if (ResultVT.bitsGT(VecEltVT)) {
18445     // If the result type of vextract is wider than the load, then issue an
18446     // extending load instead.
18447     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18448                                                   VecEltVT)
18449                                    ? ISD::ZEXTLOAD
18450                                    : ISD::EXTLOAD;
18451     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18452                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18453                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18454                           OriginalLoad->getAAInfo());
18455     Chain = Load.getValue(1);
18456   } else {
18457     Load = DAG.getLoad(
18458         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18459         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18460     Chain = Load.getValue(1);
18461     if (ResultVT.bitsLT(VecEltVT))
18462       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18463     else
18464       Load = DAG.getBitcast(ResultVT, Load);
18465   }
18466   WorklistRemover DeadNodes(*this);
18467   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18468   SDValue To[] = { Load, Chain };
18469   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18470   // Make sure to revisit this node to clean it up; it will usually be dead.
18471   AddToWorklist(EVE);
18472   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18473   // worklist explicitly as well.
18474   AddToWorklistWithUsers(Load.getNode());
18475   ++OpsNarrowed;
18476   return SDValue(EVE, 0);
18477 }
18478 
18479 /// Transform a vector binary operation into a scalar binary operation by moving
18480 /// the math/logic after an extract element of a vector.
18481 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18482                                        bool LegalOperations) {
18483   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18484   SDValue Vec = ExtElt->getOperand(0);
18485   SDValue Index = ExtElt->getOperand(1);
18486   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18487   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18488       Vec.getNode()->getNumValues() != 1)
18489     return SDValue();
18490 
18491   // Targets may want to avoid this to prevent an expensive register transfer.
18492   if (!TLI.shouldScalarizeBinop(Vec))
18493     return SDValue();
18494 
18495   // Extracting an element of a vector constant is constant-folded, so this
18496   // transform is just replacing a vector op with a scalar op while moving the
18497   // extract.
18498   SDValue Op0 = Vec.getOperand(0);
18499   SDValue Op1 = Vec.getOperand(1);
18500   if (isAnyConstantBuildVector(Op0, true) ||
18501       isAnyConstantBuildVector(Op1, true)) {
18502     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18503     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18504     SDLoc DL(ExtElt);
18505     EVT VT = ExtElt->getValueType(0);
18506     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18507     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18508     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18509   }
18510 
18511   return SDValue();
18512 }
18513 
18514 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18515   SDValue VecOp = N->getOperand(0);
18516   SDValue Index = N->getOperand(1);
18517   EVT ScalarVT = N->getValueType(0);
18518   EVT VecVT = VecOp.getValueType();
18519   if (VecOp.isUndef())
18520     return DAG.getUNDEF(ScalarVT);
18521 
18522   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18523   //
18524   // This only really matters if the index is non-constant since other combines
18525   // on the constant elements already work.
18526   SDLoc DL(N);
18527   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18528       Index == VecOp.getOperand(2)) {
18529     SDValue Elt = VecOp.getOperand(1);
18530     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18531   }
18532 
18533   // (vextract (scalar_to_vector val, 0) -> val
18534   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18535     // Only 0'th element of SCALAR_TO_VECTOR is defined.
18536     if (DAG.isKnownNeverZero(Index))
18537       return DAG.getUNDEF(ScalarVT);
18538 
18539     // Check if the result type doesn't match the inserted element type. A
18540     // SCALAR_TO_VECTOR may truncate the inserted element and the
18541     // EXTRACT_VECTOR_ELT may widen the extracted vector.
18542     SDValue InOp = VecOp.getOperand(0);
18543     if (InOp.getValueType() != ScalarVT) {
18544       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18545       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18546     }
18547     return InOp;
18548   }
18549 
18550   // extract_vector_elt of out-of-bounds element -> UNDEF
18551   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18552   if (IndexC && VecVT.isFixedLengthVector() &&
18553       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18554     return DAG.getUNDEF(ScalarVT);
18555 
18556   // extract_vector_elt (build_vector x, y), 1 -> y
18557   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18558        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18559       TLI.isTypeLegal(VecVT) &&
18560       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18561     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
18562             VecVT.isFixedLengthVector()) &&
18563            "BUILD_VECTOR used for scalable vectors");
18564     unsigned IndexVal =
18565         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18566     SDValue Elt = VecOp.getOperand(IndexVal);
18567     EVT InEltVT = Elt.getValueType();
18568 
18569     // Sometimes build_vector's scalar input types do not match result type.
18570     if (ScalarVT == InEltVT)
18571       return Elt;
18572 
18573     // TODO: It may be useful to truncate if free if the build_vector implicitly
18574     // converts.
18575   }
18576 
18577   if (VecVT.isScalableVector())
18578     return SDValue();
18579 
18580   // All the code from this point onwards assumes fixed width vectors, but it's
18581   // possible that some of the combinations could be made to work for scalable
18582   // vectors too.
18583   unsigned NumElts = VecVT.getVectorNumElements();
18584   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18585 
18586   // TODO: These transforms should not require the 'hasOneUse' restriction, but
18587   // there are regressions on multiple targets without it. We can end up with a
18588   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18589   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18590       VecOp.hasOneUse()) {
18591     // The vector index of the LSBs of the source depend on the endian-ness.
18592     bool IsLE = DAG.getDataLayout().isLittleEndian();
18593     unsigned ExtractIndex = IndexC->getZExtValue();
18594     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18595     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18596     SDValue BCSrc = VecOp.getOperand(0);
18597     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18598       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18599 
18600     if (LegalTypes && BCSrc.getValueType().isInteger() &&
18601         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18602       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18603       // trunc i64 X to i32
18604       SDValue X = BCSrc.getOperand(0);
18605       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
18606              "Extract element and scalar to vector can't change element type "
18607              "from FP to integer.");
18608       unsigned XBitWidth = X.getValueSizeInBits();
18609       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18610 
18611       // An extract element return value type can be wider than its vector
18612       // operand element type. In that case, the high bits are undefined, so
18613       // it's possible that we may need to extend rather than truncate.
18614       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18615         assert(XBitWidth % VecEltBitWidth == 0 &&
18616                "Scalar bitwidth must be a multiple of vector element bitwidth");
18617         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18618       }
18619     }
18620   }
18621 
18622   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18623     return BO;
18624 
18625   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18626   // We only perform this optimization before the op legalization phase because
18627   // we may introduce new vector instructions which are not backed by TD
18628   // patterns. For example on AVX, extracting elements from a wide vector
18629   // without using extract_subvector. However, if we can find an underlying
18630   // scalar value, then we can always use that.
18631   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18632     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18633     // Find the new index to extract from.
18634     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18635 
18636     // Extracting an undef index is undef.
18637     if (OrigElt == -1)
18638       return DAG.getUNDEF(ScalarVT);
18639 
18640     // Select the right vector half to extract from.
18641     SDValue SVInVec;
18642     if (OrigElt < (int)NumElts) {
18643       SVInVec = VecOp.getOperand(0);
18644     } else {
18645       SVInVec = VecOp.getOperand(1);
18646       OrigElt -= NumElts;
18647     }
18648 
18649     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18650       SDValue InOp = SVInVec.getOperand(OrigElt);
18651       if (InOp.getValueType() != ScalarVT) {
18652         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18653         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18654       }
18655 
18656       return InOp;
18657     }
18658 
18659     // FIXME: We should handle recursing on other vector shuffles and
18660     // scalar_to_vector here as well.
18661 
18662     if (!LegalOperations ||
18663         // FIXME: Should really be just isOperationLegalOrCustom.
18664         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18665         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18666       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18667                          DAG.getVectorIdxConstant(OrigElt, DL));
18668     }
18669   }
18670 
18671   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18672   // simplify it based on the (valid) extraction indices.
18673   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18674         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18675                Use->getOperand(0) == VecOp &&
18676                isa<ConstantSDNode>(Use->getOperand(1));
18677       })) {
18678     APInt DemandedElts = APInt::getNullValue(NumElts);
18679     for (SDNode *Use : VecOp->uses()) {
18680       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18681       if (CstElt->getAPIntValue().ult(NumElts))
18682         DemandedElts.setBit(CstElt->getZExtValue());
18683     }
18684     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18685       // We simplified the vector operand of this extract element. If this
18686       // extract is not dead, visit it again so it is folded properly.
18687       if (N->getOpcode() != ISD::DELETED_NODE)
18688         AddToWorklist(N);
18689       return SDValue(N, 0);
18690     }
18691     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18692     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18693       // We simplified the vector operand of this extract element. If this
18694       // extract is not dead, visit it again so it is folded properly.
18695       if (N->getOpcode() != ISD::DELETED_NODE)
18696         AddToWorklist(N);
18697       return SDValue(N, 0);
18698     }
18699   }
18700 
18701   // Everything under here is trying to match an extract of a loaded value.
18702   // If the result of load has to be truncated, then it's not necessarily
18703   // profitable.
18704   bool BCNumEltsChanged = false;
18705   EVT ExtVT = VecVT.getVectorElementType();
18706   EVT LVT = ExtVT;
18707   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18708     return SDValue();
18709 
18710   if (VecOp.getOpcode() == ISD::BITCAST) {
18711     // Don't duplicate a load with other uses.
18712     if (!VecOp.hasOneUse())
18713       return SDValue();
18714 
18715     EVT BCVT = VecOp.getOperand(0).getValueType();
18716     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18717       return SDValue();
18718     if (NumElts != BCVT.getVectorNumElements())
18719       BCNumEltsChanged = true;
18720     VecOp = VecOp.getOperand(0);
18721     ExtVT = BCVT.getVectorElementType();
18722   }
18723 
18724   // extract (vector load $addr), i --> load $addr + i * size
18725   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18726       ISD::isNormalLoad(VecOp.getNode()) &&
18727       !Index->hasPredecessor(VecOp.getNode())) {
18728     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18729     if (VecLoad && VecLoad->isSimple())
18730       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18731   }
18732 
18733   // Perform only after legalization to ensure build_vector / vector_shuffle
18734   // optimizations have already been done.
18735   if (!LegalOperations || !IndexC)
18736     return SDValue();
18737 
18738   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18739   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18740   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18741   int Elt = IndexC->getZExtValue();
18742   LoadSDNode *LN0 = nullptr;
18743   if (ISD::isNormalLoad(VecOp.getNode())) {
18744     LN0 = cast<LoadSDNode>(VecOp);
18745   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18746              VecOp.getOperand(0).getValueType() == ExtVT &&
18747              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18748     // Don't duplicate a load with other uses.
18749     if (!VecOp.hasOneUse())
18750       return SDValue();
18751 
18752     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18753   }
18754   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18755     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18756     // =>
18757     // (load $addr+1*size)
18758 
18759     // Don't duplicate a load with other uses.
18760     if (!VecOp.hasOneUse())
18761       return SDValue();
18762 
18763     // If the bit convert changed the number of elements, it is unsafe
18764     // to examine the mask.
18765     if (BCNumEltsChanged)
18766       return SDValue();
18767 
18768     // Select the input vector, guarding against out of range extract vector.
18769     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18770     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18771 
18772     if (VecOp.getOpcode() == ISD::BITCAST) {
18773       // Don't duplicate a load with other uses.
18774       if (!VecOp.hasOneUse())
18775         return SDValue();
18776 
18777       VecOp = VecOp.getOperand(0);
18778     }
18779     if (ISD::isNormalLoad(VecOp.getNode())) {
18780       LN0 = cast<LoadSDNode>(VecOp);
18781       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18782       Index = DAG.getConstant(Elt, DL, Index.getValueType());
18783     }
18784   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18785              VecVT.getVectorElementType() == ScalarVT &&
18786              (!LegalTypes ||
18787               TLI.isTypeLegal(
18788                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18789     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18790     //      -> extract_vector_elt a, 0
18791     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18792     //      -> extract_vector_elt a, 1
18793     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18794     //      -> extract_vector_elt b, 0
18795     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18796     //      -> extract_vector_elt b, 1
18797     SDLoc SL(N);
18798     EVT ConcatVT = VecOp.getOperand(0).getValueType();
18799     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18800     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18801                                      Index.getValueType());
18802 
18803     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18804     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18805                               ConcatVT.getVectorElementType(),
18806                               ConcatOp, NewIdx);
18807     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18808   }
18809 
18810   // Make sure we found a non-volatile load and the extractelement is
18811   // the only use.
18812   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
18813     return SDValue();
18814 
18815   // If Idx was -1 above, Elt is going to be -1, so just return undef.
18816   if (Elt == -1)
18817     return DAG.getUNDEF(LVT);
18818 
18819   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18820 }
18821 
18822 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
18823 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18824   // We perform this optimization post type-legalization because
18825   // the type-legalizer often scalarizes integer-promoted vectors.
18826   // Performing this optimization before may create bit-casts which
18827   // will be type-legalized to complex code sequences.
18828   // We perform this optimization only before the operation legalizer because we
18829   // may introduce illegal operations.
18830   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18831     return SDValue();
18832 
18833   unsigned NumInScalars = N->getNumOperands();
18834   SDLoc DL(N);
18835   EVT VT = N->getValueType(0);
18836 
18837   // Check to see if this is a BUILD_VECTOR of a bunch of values
18838   // which come from any_extend or zero_extend nodes. If so, we can create
18839   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18840   // optimizations. We do not handle sign-extend because we can't fill the sign
18841   // using shuffles.
18842   EVT SourceType = MVT::Other;
18843   bool AllAnyExt = true;
18844 
18845   for (unsigned i = 0; i != NumInScalars; ++i) {
18846     SDValue In = N->getOperand(i);
18847     // Ignore undef inputs.
18848     if (In.isUndef()) continue;
18849 
18850     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18851     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18852 
18853     // Abort if the element is not an extension.
18854     if (!ZeroExt && !AnyExt) {
18855       SourceType = MVT::Other;
18856       break;
18857     }
18858 
18859     // The input is a ZeroExt or AnyExt. Check the original type.
18860     EVT InTy = In.getOperand(0).getValueType();
18861 
18862     // Check that all of the widened source types are the same.
18863     if (SourceType == MVT::Other)
18864       // First time.
18865       SourceType = InTy;
18866     else if (InTy != SourceType) {
18867       // Multiple income types. Abort.
18868       SourceType = MVT::Other;
18869       break;
18870     }
18871 
18872     // Check if all of the extends are ANY_EXTENDs.
18873     AllAnyExt &= AnyExt;
18874   }
18875 
18876   // In order to have valid types, all of the inputs must be extended from the
18877   // same source type and all of the inputs must be any or zero extend.
18878   // Scalar sizes must be a power of two.
18879   EVT OutScalarTy = VT.getScalarType();
18880   bool ValidTypes = SourceType != MVT::Other &&
18881                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18882                  isPowerOf2_32(SourceType.getSizeInBits());
18883 
18884   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18885   // turn into a single shuffle instruction.
18886   if (!ValidTypes)
18887     return SDValue();
18888 
18889   // If we already have a splat buildvector, then don't fold it if it means
18890   // introducing zeros.
18891   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18892     return SDValue();
18893 
18894   bool isLE = DAG.getDataLayout().isLittleEndian();
18895   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18896   assert(ElemRatio > 1 && "Invalid element size ratio");
18897   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18898                                DAG.getConstant(0, DL, SourceType);
18899 
18900   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18901   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18902 
18903   // Populate the new build_vector
18904   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18905     SDValue Cast = N->getOperand(i);
18906     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18907             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18908             Cast.isUndef()) && "Invalid cast opcode");
18909     SDValue In;
18910     if (Cast.isUndef())
18911       In = DAG.getUNDEF(SourceType);
18912     else
18913       In = Cast->getOperand(0);
18914     unsigned Index = isLE ? (i * ElemRatio) :
18915                             (i * ElemRatio + (ElemRatio - 1));
18916 
18917     assert(Index < Ops.size() && "Invalid index");
18918     Ops[Index] = In;
18919   }
18920 
18921   // The type of the new BUILD_VECTOR node.
18922   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18923   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18924          "Invalid vector size");
18925   // Check if the new vector type is legal.
18926   if (!isTypeLegal(VecVT) ||
18927       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18928        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18929     return SDValue();
18930 
18931   // Make the new BUILD_VECTOR.
18932   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18933 
18934   // The new BUILD_VECTOR node has the potential to be further optimized.
18935   AddToWorklist(BV.getNode());
18936   // Bitcast to the desired type.
18937   return DAG.getBitcast(VT, BV);
18938 }
18939 
18940 // Simplify (build_vec (trunc $1)
18941 //                     (trunc (srl $1 half-width))
18942 //                     (trunc (srl $1 (2 * half-width))) …)
18943 // to (bitcast $1)
18944 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18945   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18946 
18947   // Only for little endian
18948   if (!DAG.getDataLayout().isLittleEndian())
18949     return SDValue();
18950 
18951   SDLoc DL(N);
18952   EVT VT = N->getValueType(0);
18953   EVT OutScalarTy = VT.getScalarType();
18954   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18955 
18956   // Only for power of two types to be sure that bitcast works well
18957   if (!isPowerOf2_64(ScalarTypeBitsize))
18958     return SDValue();
18959 
18960   unsigned NumInScalars = N->getNumOperands();
18961 
18962   // Look through bitcasts
18963   auto PeekThroughBitcast = [](SDValue Op) {
18964     if (Op.getOpcode() == ISD::BITCAST)
18965       return Op.getOperand(0);
18966     return Op;
18967   };
18968 
18969   // The source value where all the parts are extracted.
18970   SDValue Src;
18971   for (unsigned i = 0; i != NumInScalars; ++i) {
18972     SDValue In = PeekThroughBitcast(N->getOperand(i));
18973     // Ignore undef inputs.
18974     if (In.isUndef()) continue;
18975 
18976     if (In.getOpcode() != ISD::TRUNCATE)
18977       return SDValue();
18978 
18979     In = PeekThroughBitcast(In.getOperand(0));
18980 
18981     if (In.getOpcode() != ISD::SRL) {
18982       // For now only build_vec without shuffling, handle shifts here in the
18983       // future.
18984       if (i != 0)
18985         return SDValue();
18986 
18987       Src = In;
18988     } else {
18989       // In is SRL
18990       SDValue part = PeekThroughBitcast(In.getOperand(0));
18991 
18992       if (!Src) {
18993         Src = part;
18994       } else if (Src != part) {
18995         // Vector parts do not stem from the same variable
18996         return SDValue();
18997       }
18998 
18999       SDValue ShiftAmtVal = In.getOperand(1);
19000       if (!isa<ConstantSDNode>(ShiftAmtVal))
19001         return SDValue();
19002 
19003       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
19004 
19005       // The extracted value is not extracted at the right position
19006       if (ShiftAmt != i * ScalarTypeBitsize)
19007         return SDValue();
19008     }
19009   }
19010 
19011   // Only cast if the size is the same
19012   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
19013     return SDValue();
19014 
19015   return DAG.getBitcast(VT, Src);
19016 }
19017 
19018 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
19019                                            ArrayRef<int> VectorMask,
19020                                            SDValue VecIn1, SDValue VecIn2,
19021                                            unsigned LeftIdx, bool DidSplitVec) {
19022   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
19023 
19024   EVT VT = N->getValueType(0);
19025   EVT InVT1 = VecIn1.getValueType();
19026   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
19027 
19028   unsigned NumElems = VT.getVectorNumElements();
19029   unsigned ShuffleNumElems = NumElems;
19030 
19031   // If we artificially split a vector in two already, then the offsets in the
19032   // operands will all be based off of VecIn1, even those in VecIn2.
19033   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
19034 
19035   uint64_t VTSize = VT.getFixedSizeInBits();
19036   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
19037   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
19038 
19039   // We can't generate a shuffle node with mismatched input and output types.
19040   // Try to make the types match the type of the output.
19041   if (InVT1 != VT || InVT2 != VT) {
19042     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
19043       // If the output vector length is a multiple of both input lengths,
19044       // we can concatenate them and pad the rest with undefs.
19045       unsigned NumConcats = VTSize / InVT1Size;
19046       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
19047       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
19048       ConcatOps[0] = VecIn1;
19049       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
19050       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19051       VecIn2 = SDValue();
19052     } else if (InVT1Size == VTSize * 2) {
19053       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
19054         return SDValue();
19055 
19056       if (!VecIn2.getNode()) {
19057         // If we only have one input vector, and it's twice the size of the
19058         // output, split it in two.
19059         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
19060                              DAG.getVectorIdxConstant(NumElems, DL));
19061         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
19062         // Since we now have shorter input vectors, adjust the offset of the
19063         // second vector's start.
19064         Vec2Offset = NumElems;
19065       } else if (InVT2Size <= InVT1Size) {
19066         // VecIn1 is wider than the output, and we have another, possibly
19067         // smaller input. Pad the smaller input with undefs, shuffle at the
19068         // input vector width, and extract the output.
19069         // The shuffle type is different than VT, so check legality again.
19070         if (LegalOperations &&
19071             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
19072           return SDValue();
19073 
19074         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
19075         // lower it back into a BUILD_VECTOR. So if the inserted type is
19076         // illegal, don't even try.
19077         if (InVT1 != InVT2) {
19078           if (!TLI.isTypeLegal(InVT2))
19079             return SDValue();
19080           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
19081                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
19082         }
19083         ShuffleNumElems = NumElems * 2;
19084       } else {
19085         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
19086         // than VecIn1. We can't handle this for now - this case will disappear
19087         // when we start sorting the vectors by type.
19088         return SDValue();
19089       }
19090     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
19091       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
19092       ConcatOps[0] = VecIn2;
19093       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19094     } else {
19095       // TODO: Support cases where the length mismatch isn't exactly by a
19096       // factor of 2.
19097       // TODO: Move this check upwards, so that if we have bad type
19098       // mismatches, we don't create any DAG nodes.
19099       return SDValue();
19100     }
19101   }
19102 
19103   // Initialize mask to undef.
19104   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
19105 
19106   // Only need to run up to the number of elements actually used, not the
19107   // total number of elements in the shuffle - if we are shuffling a wider
19108   // vector, the high lanes should be set to undef.
19109   for (unsigned i = 0; i != NumElems; ++i) {
19110     if (VectorMask[i] <= 0)
19111       continue;
19112 
19113     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
19114     if (VectorMask[i] == (int)LeftIdx) {
19115       Mask[i] = ExtIndex;
19116     } else if (VectorMask[i] == (int)LeftIdx + 1) {
19117       Mask[i] = Vec2Offset + ExtIndex;
19118     }
19119   }
19120 
19121   // The type the input vectors may have changed above.
19122   InVT1 = VecIn1.getValueType();
19123 
19124   // If we already have a VecIn2, it should have the same type as VecIn1.
19125   // If we don't, get an undef/zero vector of the appropriate type.
19126   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
19127   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
19128 
19129   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
19130   if (ShuffleNumElems > NumElems)
19131     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
19132 
19133   return Shuffle;
19134 }
19135 
19136 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
19137   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
19138 
19139   // First, determine where the build vector is not undef.
19140   // TODO: We could extend this to handle zero elements as well as undefs.
19141   int NumBVOps = BV->getNumOperands();
19142   int ZextElt = -1;
19143   for (int i = 0; i != NumBVOps; ++i) {
19144     SDValue Op = BV->getOperand(i);
19145     if (Op.isUndef())
19146       continue;
19147     if (ZextElt == -1)
19148       ZextElt = i;
19149     else
19150       return SDValue();
19151   }
19152   // Bail out if there's no non-undef element.
19153   if (ZextElt == -1)
19154     return SDValue();
19155 
19156   // The build vector contains some number of undef elements and exactly
19157   // one other element. That other element must be a zero-extended scalar
19158   // extracted from a vector at a constant index to turn this into a shuffle.
19159   // Also, require that the build vector does not implicitly truncate/extend
19160   // its elements.
19161   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
19162   EVT VT = BV->getValueType(0);
19163   SDValue Zext = BV->getOperand(ZextElt);
19164   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
19165       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19166       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
19167       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
19168     return SDValue();
19169 
19170   // The zero-extend must be a multiple of the source size, and we must be
19171   // building a vector of the same size as the source of the extract element.
19172   SDValue Extract = Zext.getOperand(0);
19173   unsigned DestSize = Zext.getValueSizeInBits();
19174   unsigned SrcSize = Extract.getValueSizeInBits();
19175   if (DestSize % SrcSize != 0 ||
19176       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
19177     return SDValue();
19178 
19179   // Create a shuffle mask that will combine the extracted element with zeros
19180   // and undefs.
19181   int ZextRatio = DestSize / SrcSize;
19182   int NumMaskElts = NumBVOps * ZextRatio;
19183   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
19184   for (int i = 0; i != NumMaskElts; ++i) {
19185     if (i / ZextRatio == ZextElt) {
19186       // The low bits of the (potentially translated) extracted element map to
19187       // the source vector. The high bits map to zero. We will use a zero vector
19188       // as the 2nd source operand of the shuffle, so use the 1st element of
19189       // that vector (mask value is number-of-elements) for the high bits.
19190       if (i % ZextRatio == 0)
19191         ShufMask[i] = Extract.getConstantOperandVal(1);
19192       else
19193         ShufMask[i] = NumMaskElts;
19194     }
19195 
19196     // Undef elements of the build vector remain undef because we initialize
19197     // the shuffle mask with -1.
19198   }
19199 
19200   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
19201   // bitcast (shuffle V, ZeroVec, VectorMask)
19202   SDLoc DL(BV);
19203   EVT VecVT = Extract.getOperand(0).getValueType();
19204   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
19205   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19206   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
19207                                              ZeroVec, ShufMask, DAG);
19208   if (!Shuf)
19209     return SDValue();
19210   return DAG.getBitcast(VT, Shuf);
19211 }
19212 
19213 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
19214 // operations. If the types of the vectors we're extracting from allow it,
19215 // turn this into a vector_shuffle node.
19216 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
19217   SDLoc DL(N);
19218   EVT VT = N->getValueType(0);
19219 
19220   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
19221   if (!isTypeLegal(VT))
19222     return SDValue();
19223 
19224   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
19225     return V;
19226 
19227   // May only combine to shuffle after legalize if shuffle is legal.
19228   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
19229     return SDValue();
19230 
19231   bool UsesZeroVector = false;
19232   unsigned NumElems = N->getNumOperands();
19233 
19234   // Record, for each element of the newly built vector, which input vector
19235   // that element comes from. -1 stands for undef, 0 for the zero vector,
19236   // and positive values for the input vectors.
19237   // VectorMask maps each element to its vector number, and VecIn maps vector
19238   // numbers to their initial SDValues.
19239 
19240   SmallVector<int, 8> VectorMask(NumElems, -1);
19241   SmallVector<SDValue, 8> VecIn;
19242   VecIn.push_back(SDValue());
19243 
19244   for (unsigned i = 0; i != NumElems; ++i) {
19245     SDValue Op = N->getOperand(i);
19246 
19247     if (Op.isUndef())
19248       continue;
19249 
19250     // See if we can use a blend with a zero vector.
19251     // TODO: Should we generalize this to a blend with an arbitrary constant
19252     // vector?
19253     if (isNullConstant(Op) || isNullFPConstant(Op)) {
19254       UsesZeroVector = true;
19255       VectorMask[i] = 0;
19256       continue;
19257     }
19258 
19259     // Not an undef or zero. If the input is something other than an
19260     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
19261     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
19262         !isa<ConstantSDNode>(Op.getOperand(1)))
19263       return SDValue();
19264     SDValue ExtractedFromVec = Op.getOperand(0);
19265 
19266     if (ExtractedFromVec.getValueType().isScalableVector())
19267       return SDValue();
19268 
19269     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
19270     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
19271       return SDValue();
19272 
19273     // All inputs must have the same element type as the output.
19274     if (VT.getVectorElementType() !=
19275         ExtractedFromVec.getValueType().getVectorElementType())
19276       return SDValue();
19277 
19278     // Have we seen this input vector before?
19279     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
19280     // a map back from SDValues to numbers isn't worth it.
19281     unsigned Idx = std::distance(VecIn.begin(), find(VecIn, ExtractedFromVec));
19282     if (Idx == VecIn.size())
19283       VecIn.push_back(ExtractedFromVec);
19284 
19285     VectorMask[i] = Idx;
19286   }
19287 
19288   // If we didn't find at least one input vector, bail out.
19289   if (VecIn.size() < 2)
19290     return SDValue();
19291 
19292   // If all the Operands of BUILD_VECTOR extract from same
19293   // vector, then split the vector efficiently based on the maximum
19294   // vector access index and adjust the VectorMask and
19295   // VecIn accordingly.
19296   bool DidSplitVec = false;
19297   if (VecIn.size() == 2) {
19298     unsigned MaxIndex = 0;
19299     unsigned NearestPow2 = 0;
19300     SDValue Vec = VecIn.back();
19301     EVT InVT = Vec.getValueType();
19302     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
19303 
19304     for (unsigned i = 0; i < NumElems; i++) {
19305       if (VectorMask[i] <= 0)
19306         continue;
19307       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
19308       IndexVec[i] = Index;
19309       MaxIndex = std::max(MaxIndex, Index);
19310     }
19311 
19312     NearestPow2 = PowerOf2Ceil(MaxIndex);
19313     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
19314         NumElems * 2 < NearestPow2) {
19315       unsigned SplitSize = NearestPow2 / 2;
19316       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
19317                                      InVT.getVectorElementType(), SplitSize);
19318       if (TLI.isTypeLegal(SplitVT)) {
19319         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19320                                      DAG.getVectorIdxConstant(SplitSize, DL));
19321         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
19322                                      DAG.getVectorIdxConstant(0, DL));
19323         VecIn.pop_back();
19324         VecIn.push_back(VecIn1);
19325         VecIn.push_back(VecIn2);
19326         DidSplitVec = true;
19327 
19328         for (unsigned i = 0; i < NumElems; i++) {
19329           if (VectorMask[i] <= 0)
19330             continue;
19331           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19332         }
19333       }
19334     }
19335   }
19336 
19337   // TODO: We want to sort the vectors by descending length, so that adjacent
19338   // pairs have similar length, and the longer vector is always first in the
19339   // pair.
19340 
19341   // TODO: Should this fire if some of the input vectors has illegal type (like
19342   // it does now), or should we let legalization run its course first?
19343 
19344   // Shuffle phase:
19345   // Take pairs of vectors, and shuffle them so that the result has elements
19346   // from these vectors in the correct places.
19347   // For example, given:
19348   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19349   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19350   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19351   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19352   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19353   // We will generate:
19354   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19355   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19356   SmallVector<SDValue, 4> Shuffles;
19357   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19358     unsigned LeftIdx = 2 * In + 1;
19359     SDValue VecLeft = VecIn[LeftIdx];
19360     SDValue VecRight =
19361         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19362 
19363     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19364                                                 VecRight, LeftIdx, DidSplitVec))
19365       Shuffles.push_back(Shuffle);
19366     else
19367       return SDValue();
19368   }
19369 
19370   // If we need the zero vector as an "ingredient" in the blend tree, add it
19371   // to the list of shuffles.
19372   if (UsesZeroVector)
19373     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19374                                       : DAG.getConstantFP(0.0, DL, VT));
19375 
19376   // If we only have one shuffle, we're done.
19377   if (Shuffles.size() == 1)
19378     return Shuffles[0];
19379 
19380   // Update the vector mask to point to the post-shuffle vectors.
19381   for (int &Vec : VectorMask)
19382     if (Vec == 0)
19383       Vec = Shuffles.size() - 1;
19384     else
19385       Vec = (Vec - 1) / 2;
19386 
19387   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19388   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19389   // generate:
19390   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19391   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19392   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19393   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19394   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19395   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19396   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19397 
19398   // Make sure the initial size of the shuffle list is even.
19399   if (Shuffles.size() % 2)
19400     Shuffles.push_back(DAG.getUNDEF(VT));
19401 
19402   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19403     if (CurSize % 2) {
19404       Shuffles[CurSize] = DAG.getUNDEF(VT);
19405       CurSize++;
19406     }
19407     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19408       int Left = 2 * In;
19409       int Right = 2 * In + 1;
19410       SmallVector<int, 8> Mask(NumElems, -1);
19411       for (unsigned i = 0; i != NumElems; ++i) {
19412         if (VectorMask[i] == Left) {
19413           Mask[i] = i;
19414           VectorMask[i] = In;
19415         } else if (VectorMask[i] == Right) {
19416           Mask[i] = i + NumElems;
19417           VectorMask[i] = In;
19418         }
19419       }
19420 
19421       Shuffles[In] =
19422           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19423     }
19424   }
19425   return Shuffles[0];
19426 }
19427 
19428 // Try to turn a build vector of zero extends of extract vector elts into a
19429 // a vector zero extend and possibly an extract subvector.
19430 // TODO: Support sign extend?
19431 // TODO: Allow undef elements?
19432 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19433   if (LegalOperations)
19434     return SDValue();
19435 
19436   EVT VT = N->getValueType(0);
19437 
19438   bool FoundZeroExtend = false;
19439   SDValue Op0 = N->getOperand(0);
19440   auto checkElem = [&](SDValue Op) -> int64_t {
19441     unsigned Opc = Op.getOpcode();
19442     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19443     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19444         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19445         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19446       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19447         return C->getZExtValue();
19448     return -1;
19449   };
19450 
19451   // Make sure the first element matches
19452   // (zext (extract_vector_elt X, C))
19453   int64_t Offset = checkElem(Op0);
19454   if (Offset < 0)
19455     return SDValue();
19456 
19457   unsigned NumElems = N->getNumOperands();
19458   SDValue In = Op0.getOperand(0).getOperand(0);
19459   EVT InSVT = In.getValueType().getScalarType();
19460   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19461 
19462   // Don't create an illegal input type after type legalization.
19463   if (LegalTypes && !TLI.isTypeLegal(InVT))
19464     return SDValue();
19465 
19466   // Ensure all the elements come from the same vector and are adjacent.
19467   for (unsigned i = 1; i != NumElems; ++i) {
19468     if ((Offset + i) != checkElem(N->getOperand(i)))
19469       return SDValue();
19470   }
19471 
19472   SDLoc DL(N);
19473   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19474                    Op0.getOperand(0).getOperand(1));
19475   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19476                      VT, In);
19477 }
19478 
19479 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19480   EVT VT = N->getValueType(0);
19481 
19482   // A vector built entirely of undefs is undef.
19483   if (ISD::allOperandsUndef(N))
19484     return DAG.getUNDEF(VT);
19485 
19486   // If this is a splat of a bitcast from another vector, change to a
19487   // concat_vector.
19488   // For example:
19489   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19490   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19491   //
19492   // If X is a build_vector itself, the concat can become a larger build_vector.
19493   // TODO: Maybe this is useful for non-splat too?
19494   if (!LegalOperations) {
19495     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19496       Splat = peekThroughBitcasts(Splat);
19497       EVT SrcVT = Splat.getValueType();
19498       if (SrcVT.isVector()) {
19499         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19500         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19501                                      SrcVT.getVectorElementType(), NumElts);
19502         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19503           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19504           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19505                                        NewVT, Ops);
19506           return DAG.getBitcast(VT, Concat);
19507         }
19508       }
19509     }
19510   }
19511 
19512   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19513   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19514     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19515       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
19516       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19517     }
19518 
19519   // Check if we can express BUILD VECTOR via subvector extract.
19520   if (!LegalTypes && (N->getNumOperands() > 1)) {
19521     SDValue Op0 = N->getOperand(0);
19522     auto checkElem = [&](SDValue Op) -> uint64_t {
19523       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19524           (Op0.getOperand(0) == Op.getOperand(0)))
19525         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19526           return CNode->getZExtValue();
19527       return -1;
19528     };
19529 
19530     int Offset = checkElem(Op0);
19531     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19532       if (Offset + i != checkElem(N->getOperand(i))) {
19533         Offset = -1;
19534         break;
19535       }
19536     }
19537 
19538     if ((Offset == 0) &&
19539         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19540       return Op0.getOperand(0);
19541     if ((Offset != -1) &&
19542         ((Offset % N->getValueType(0).getVectorNumElements()) ==
19543          0)) // IDX must be multiple of output size.
19544       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19545                          Op0.getOperand(0), Op0.getOperand(1));
19546   }
19547 
19548   if (SDValue V = convertBuildVecZextToZext(N))
19549     return V;
19550 
19551   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19552     return V;
19553 
19554   if (SDValue V = reduceBuildVecTruncToBitCast(N))
19555     return V;
19556 
19557   if (SDValue V = reduceBuildVecToShuffle(N))
19558     return V;
19559 
19560   return SDValue();
19561 }
19562 
19563 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19564   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19565   EVT OpVT = N->getOperand(0).getValueType();
19566 
19567   // If the operands are legal vectors, leave them alone.
19568   if (TLI.isTypeLegal(OpVT))
19569     return SDValue();
19570 
19571   SDLoc DL(N);
19572   EVT VT = N->getValueType(0);
19573   SmallVector<SDValue, 8> Ops;
19574 
19575   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19576   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19577 
19578   // Keep track of what we encounter.
19579   bool AnyInteger = false;
19580   bool AnyFP = false;
19581   for (const SDValue &Op : N->ops()) {
19582     if (ISD::BITCAST == Op.getOpcode() &&
19583         !Op.getOperand(0).getValueType().isVector())
19584       Ops.push_back(Op.getOperand(0));
19585     else if (ISD::UNDEF == Op.getOpcode())
19586       Ops.push_back(ScalarUndef);
19587     else
19588       return SDValue();
19589 
19590     // Note whether we encounter an integer or floating point scalar.
19591     // If it's neither, bail out, it could be something weird like x86mmx.
19592     EVT LastOpVT = Ops.back().getValueType();
19593     if (LastOpVT.isFloatingPoint())
19594       AnyFP = true;
19595     else if (LastOpVT.isInteger())
19596       AnyInteger = true;
19597     else
19598       return SDValue();
19599   }
19600 
19601   // If any of the operands is a floating point scalar bitcast to a vector,
19602   // use floating point types throughout, and bitcast everything.
19603   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19604   if (AnyFP) {
19605     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19606     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19607     if (AnyInteger) {
19608       for (SDValue &Op : Ops) {
19609         if (Op.getValueType() == SVT)
19610           continue;
19611         if (Op.isUndef())
19612           Op = ScalarUndef;
19613         else
19614           Op = DAG.getBitcast(SVT, Op);
19615       }
19616     }
19617   }
19618 
19619   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19620                                VT.getSizeInBits() / SVT.getSizeInBits());
19621   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19622 }
19623 
19624 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19625 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19626 // most two distinct vectors the same size as the result, attempt to turn this
19627 // into a legal shuffle.
19628 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19629   EVT VT = N->getValueType(0);
19630   EVT OpVT = N->getOperand(0).getValueType();
19631 
19632   // We currently can't generate an appropriate shuffle for a scalable vector.
19633   if (VT.isScalableVector())
19634     return SDValue();
19635 
19636   int NumElts = VT.getVectorNumElements();
19637   int NumOpElts = OpVT.getVectorNumElements();
19638 
19639   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19640   SmallVector<int, 8> Mask;
19641 
19642   for (SDValue Op : N->ops()) {
19643     Op = peekThroughBitcasts(Op);
19644 
19645     // UNDEF nodes convert to UNDEF shuffle mask values.
19646     if (Op.isUndef()) {
19647       Mask.append((unsigned)NumOpElts, -1);
19648       continue;
19649     }
19650 
19651     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19652       return SDValue();
19653 
19654     // What vector are we extracting the subvector from and at what index?
19655     SDValue ExtVec = Op.getOperand(0);
19656     int ExtIdx = Op.getConstantOperandVal(1);
19657 
19658     // We want the EVT of the original extraction to correctly scale the
19659     // extraction index.
19660     EVT ExtVT = ExtVec.getValueType();
19661     ExtVec = peekThroughBitcasts(ExtVec);
19662 
19663     // UNDEF nodes convert to UNDEF shuffle mask values.
19664     if (ExtVec.isUndef()) {
19665       Mask.append((unsigned)NumOpElts, -1);
19666       continue;
19667     }
19668 
19669     // Ensure that we are extracting a subvector from a vector the same
19670     // size as the result.
19671     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19672       return SDValue();
19673 
19674     // Scale the subvector index to account for any bitcast.
19675     int NumExtElts = ExtVT.getVectorNumElements();
19676     if (0 == (NumExtElts % NumElts))
19677       ExtIdx /= (NumExtElts / NumElts);
19678     else if (0 == (NumElts % NumExtElts))
19679       ExtIdx *= (NumElts / NumExtElts);
19680     else
19681       return SDValue();
19682 
19683     // At most we can reference 2 inputs in the final shuffle.
19684     if (SV0.isUndef() || SV0 == ExtVec) {
19685       SV0 = ExtVec;
19686       for (int i = 0; i != NumOpElts; ++i)
19687         Mask.push_back(i + ExtIdx);
19688     } else if (SV1.isUndef() || SV1 == ExtVec) {
19689       SV1 = ExtVec;
19690       for (int i = 0; i != NumOpElts; ++i)
19691         Mask.push_back(i + ExtIdx + NumElts);
19692     } else {
19693       return SDValue();
19694     }
19695   }
19696 
19697   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19698   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19699                                      DAG.getBitcast(VT, SV1), Mask, DAG);
19700 }
19701 
19702 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19703   unsigned CastOpcode = N->getOperand(0).getOpcode();
19704   switch (CastOpcode) {
19705   case ISD::SINT_TO_FP:
19706   case ISD::UINT_TO_FP:
19707   case ISD::FP_TO_SINT:
19708   case ISD::FP_TO_UINT:
19709     // TODO: Allow more opcodes?
19710     //  case ISD::BITCAST:
19711     //  case ISD::TRUNCATE:
19712     //  case ISD::ZERO_EXTEND:
19713     //  case ISD::SIGN_EXTEND:
19714     //  case ISD::FP_EXTEND:
19715     break;
19716   default:
19717     return SDValue();
19718   }
19719 
19720   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19721   if (!SrcVT.isVector())
19722     return SDValue();
19723 
19724   // All operands of the concat must be the same kind of cast from the same
19725   // source type.
19726   SmallVector<SDValue, 4> SrcOps;
19727   for (SDValue Op : N->ops()) {
19728     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19729         Op.getOperand(0).getValueType() != SrcVT)
19730       return SDValue();
19731     SrcOps.push_back(Op.getOperand(0));
19732   }
19733 
19734   // The wider cast must be supported by the target. This is unusual because
19735   // the operation support type parameter depends on the opcode. In addition,
19736   // check the other type in the cast to make sure this is really legal.
19737   EVT VT = N->getValueType(0);
19738   EVT SrcEltVT = SrcVT.getVectorElementType();
19739   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19740   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19741   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19742   switch (CastOpcode) {
19743   case ISD::SINT_TO_FP:
19744   case ISD::UINT_TO_FP:
19745     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19746         !TLI.isTypeLegal(VT))
19747       return SDValue();
19748     break;
19749   case ISD::FP_TO_SINT:
19750   case ISD::FP_TO_UINT:
19751     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19752         !TLI.isTypeLegal(ConcatSrcVT))
19753       return SDValue();
19754     break;
19755   default:
19756     llvm_unreachable("Unexpected cast opcode");
19757   }
19758 
19759   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19760   SDLoc DL(N);
19761   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19762   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19763 }
19764 
19765 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19766   // If we only have one input vector, we don't need to do any concatenation.
19767   if (N->getNumOperands() == 1)
19768     return N->getOperand(0);
19769 
19770   // Check if all of the operands are undefs.
19771   EVT VT = N->getValueType(0);
19772   if (ISD::allOperandsUndef(N))
19773     return DAG.getUNDEF(VT);
19774 
19775   // Optimize concat_vectors where all but the first of the vectors are undef.
19776   if (all_of(drop_begin(N->ops()),
19777              [](const SDValue &Op) { return Op.isUndef(); })) {
19778     SDValue In = N->getOperand(0);
19779     assert(In.getValueType().isVector() && "Must concat vectors");
19780 
19781     // If the input is a concat_vectors, just make a larger concat by padding
19782     // with smaller undefs.
19783     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19784       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19785       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19786       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19787       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19788     }
19789 
19790     SDValue Scalar = peekThroughOneUseBitcasts(In);
19791 
19792     // concat_vectors(scalar_to_vector(scalar), undef) ->
19793     //     scalar_to_vector(scalar)
19794     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19795          Scalar.hasOneUse()) {
19796       EVT SVT = Scalar.getValueType().getVectorElementType();
19797       if (SVT == Scalar.getOperand(0).getValueType())
19798         Scalar = Scalar.getOperand(0);
19799     }
19800 
19801     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19802     if (!Scalar.getValueType().isVector()) {
19803       // If the bitcast type isn't legal, it might be a trunc of a legal type;
19804       // look through the trunc so we can still do the transform:
19805       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19806       if (Scalar->getOpcode() == ISD::TRUNCATE &&
19807           !TLI.isTypeLegal(Scalar.getValueType()) &&
19808           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19809         Scalar = Scalar->getOperand(0);
19810 
19811       EVT SclTy = Scalar.getValueType();
19812 
19813       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19814         return SDValue();
19815 
19816       // Bail out if the vector size is not a multiple of the scalar size.
19817       if (VT.getSizeInBits() % SclTy.getSizeInBits())
19818         return SDValue();
19819 
19820       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19821       if (VNTNumElms < 2)
19822         return SDValue();
19823 
19824       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19825       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
19826         return SDValue();
19827 
19828       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19829       return DAG.getBitcast(VT, Res);
19830     }
19831   }
19832 
19833   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19834   // We have already tested above for an UNDEF only concatenation.
19835   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19836   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19837   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19838     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19839   };
19840   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19841     SmallVector<SDValue, 8> Opnds;
19842     EVT SVT = VT.getScalarType();
19843 
19844     EVT MinVT = SVT;
19845     if (!SVT.isFloatingPoint()) {
19846       // If BUILD_VECTOR are from built from integer, they may have different
19847       // operand types. Get the smallest type and truncate all operands to it.
19848       bool FoundMinVT = false;
19849       for (const SDValue &Op : N->ops())
19850         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19851           EVT OpSVT = Op.getOperand(0).getValueType();
19852           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19853           FoundMinVT = true;
19854         }
19855       assert(FoundMinVT && "Concat vector type mismatch");
19856     }
19857 
19858     for (const SDValue &Op : N->ops()) {
19859       EVT OpVT = Op.getValueType();
19860       unsigned NumElts = OpVT.getVectorNumElements();
19861 
19862       if (ISD::UNDEF == Op.getOpcode())
19863         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19864 
19865       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19866         if (SVT.isFloatingPoint()) {
19867           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19868           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19869         } else {
19870           for (unsigned i = 0; i != NumElts; ++i)
19871             Opnds.push_back(
19872                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19873         }
19874       }
19875     }
19876 
19877     assert(VT.getVectorNumElements() == Opnds.size() &&
19878            "Concat vector type mismatch");
19879     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19880   }
19881 
19882   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19883   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19884     return V;
19885 
19886   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19887   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19888     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19889       return V;
19890 
19891   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19892     return V;
19893 
19894   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19895   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19896   // operands and look for a CONCAT operations that place the incoming vectors
19897   // at the exact same location.
19898   //
19899   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19900   SDValue SingleSource = SDValue();
19901   unsigned PartNumElem =
19902       N->getOperand(0).getValueType().getVectorMinNumElements();
19903 
19904   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19905     SDValue Op = N->getOperand(i);
19906 
19907     if (Op.isUndef())
19908       continue;
19909 
19910     // Check if this is the identity extract:
19911     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19912       return SDValue();
19913 
19914     // Find the single incoming vector for the extract_subvector.
19915     if (SingleSource.getNode()) {
19916       if (Op.getOperand(0) != SingleSource)
19917         return SDValue();
19918     } else {
19919       SingleSource = Op.getOperand(0);
19920 
19921       // Check the source type is the same as the type of the result.
19922       // If not, this concat may extend the vector, so we can not
19923       // optimize it away.
19924       if (SingleSource.getValueType() != N->getValueType(0))
19925         return SDValue();
19926     }
19927 
19928     // Check that we are reading from the identity index.
19929     unsigned IdentityIndex = i * PartNumElem;
19930     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19931       return SDValue();
19932   }
19933 
19934   if (SingleSource.getNode())
19935     return SingleSource;
19936 
19937   return SDValue();
19938 }
19939 
19940 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19941 // if the subvector can be sourced for free.
19942 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19943   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19944       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19945     return V.getOperand(1);
19946   }
19947   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19948   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19949       V.getOperand(0).getValueType() == SubVT &&
19950       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
19951     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
19952     return V.getOperand(SubIdx);
19953   }
19954   return SDValue();
19955 }
19956 
19957 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19958                                               SelectionDAG &DAG,
19959                                               bool LegalOperations) {
19960   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19961   SDValue BinOp = Extract->getOperand(0);
19962   unsigned BinOpcode = BinOp.getOpcode();
19963   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19964     return SDValue();
19965 
19966   EVT VecVT = BinOp.getValueType();
19967   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19968   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19969     return SDValue();
19970 
19971   SDValue Index = Extract->getOperand(1);
19972   EVT SubVT = Extract->getValueType(0);
19973   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
19974     return SDValue();
19975 
19976   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19977   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19978 
19979   // TODO: We could handle the case where only 1 operand is being inserted by
19980   //       creating an extract of the other operand, but that requires checking
19981   //       number of uses and/or costs.
19982   if (!Sub0 || !Sub1)
19983     return SDValue();
19984 
19985   // We are inserting both operands of the wide binop only to extract back
19986   // to the narrow vector size. Eliminate all of the insert/extract:
19987   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19988   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19989                      BinOp->getFlags());
19990 }
19991 
19992 /// If we are extracting a subvector produced by a wide binary operator try
19993 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19994 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
19995                                           bool LegalOperations) {
19996   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19997   // some of these bailouts with other transforms.
19998 
19999   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
20000     return V;
20001 
20002   // The extract index must be a constant, so we can map it to a concat operand.
20003   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20004   if (!ExtractIndexC)
20005     return SDValue();
20006 
20007   // We are looking for an optionally bitcasted wide vector binary operator
20008   // feeding an extract subvector.
20009   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20010   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
20011   unsigned BOpcode = BinOp.getOpcode();
20012   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
20013     return SDValue();
20014 
20015   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
20016   // reduced to the unary fneg when it is visited, and we probably want to deal
20017   // with fneg in a target-specific way.
20018   if (BOpcode == ISD::FSUB) {
20019     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
20020     if (C && C->getValueAPF().isNegZero())
20021       return SDValue();
20022   }
20023 
20024   // The binop must be a vector type, so we can extract some fraction of it.
20025   EVT WideBVT = BinOp.getValueType();
20026   // The optimisations below currently assume we are dealing with fixed length
20027   // vectors. It is possible to add support for scalable vectors, but at the
20028   // moment we've done no analysis to prove whether they are profitable or not.
20029   if (!WideBVT.isFixedLengthVector())
20030     return SDValue();
20031 
20032   EVT VT = Extract->getValueType(0);
20033   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
20034   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
20035          "Extract index is not a multiple of the vector length.");
20036 
20037   // Bail out if this is not a proper multiple width extraction.
20038   unsigned WideWidth = WideBVT.getSizeInBits();
20039   unsigned NarrowWidth = VT.getSizeInBits();
20040   if (WideWidth % NarrowWidth != 0)
20041     return SDValue();
20042 
20043   // Bail out if we are extracting a fraction of a single operation. This can
20044   // occur because we potentially looked through a bitcast of the binop.
20045   unsigned NarrowingRatio = WideWidth / NarrowWidth;
20046   unsigned WideNumElts = WideBVT.getVectorNumElements();
20047   if (WideNumElts % NarrowingRatio != 0)
20048     return SDValue();
20049 
20050   // Bail out if the target does not support a narrower version of the binop.
20051   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
20052                                    WideNumElts / NarrowingRatio);
20053   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
20054     return SDValue();
20055 
20056   // If extraction is cheap, we don't need to look at the binop operands
20057   // for concat ops. The narrow binop alone makes this transform profitable.
20058   // We can't just reuse the original extract index operand because we may have
20059   // bitcasted.
20060   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
20061   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
20062   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
20063       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
20064     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
20065     SDLoc DL(Extract);
20066     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20067     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20068                             BinOp.getOperand(0), NewExtIndex);
20069     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20070                             BinOp.getOperand(1), NewExtIndex);
20071     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
20072                                       BinOp.getNode()->getFlags());
20073     return DAG.getBitcast(VT, NarrowBinOp);
20074   }
20075 
20076   // Only handle the case where we are doubling and then halving. A larger ratio
20077   // may require more than two narrow binops to replace the wide binop.
20078   if (NarrowingRatio != 2)
20079     return SDValue();
20080 
20081   // TODO: The motivating case for this transform is an x86 AVX1 target. That
20082   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
20083   // flavors, but no other 256-bit integer support. This could be extended to
20084   // handle any binop, but that may require fixing/adding other folds to avoid
20085   // codegen regressions.
20086   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
20087     return SDValue();
20088 
20089   // We need at least one concatenation operation of a binop operand to make
20090   // this transform worthwhile. The concat must double the input vector sizes.
20091   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
20092     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
20093       return V.getOperand(ConcatOpNum);
20094     return SDValue();
20095   };
20096   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
20097   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
20098 
20099   if (SubVecL || SubVecR) {
20100     // If a binop operand was not the result of a concat, we must extract a
20101     // half-sized operand for our new narrow binop:
20102     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
20103     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
20104     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
20105     SDLoc DL(Extract);
20106     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
20107     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
20108                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20109                                       BinOp.getOperand(0), IndexC);
20110 
20111     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
20112                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
20113                                       BinOp.getOperand(1), IndexC);
20114 
20115     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
20116     return DAG.getBitcast(VT, NarrowBinOp);
20117   }
20118 
20119   return SDValue();
20120 }
20121 
20122 /// If we are extracting a subvector from a wide vector load, convert to a
20123 /// narrow load to eliminate the extraction:
20124 /// (extract_subvector (load wide vector)) --> (load narrow vector)
20125 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
20126   // TODO: Add support for big-endian. The offset calculation must be adjusted.
20127   if (DAG.getDataLayout().isBigEndian())
20128     return SDValue();
20129 
20130   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
20131   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
20132   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
20133       !ExtIdx)
20134     return SDValue();
20135 
20136   // Allow targets to opt-out.
20137   EVT VT = Extract->getValueType(0);
20138 
20139   // We can only create byte sized loads.
20140   if (!VT.isByteSized())
20141     return SDValue();
20142 
20143   unsigned Index = ExtIdx->getZExtValue();
20144   unsigned NumElts = VT.getVectorMinNumElements();
20145 
20146   // The definition of EXTRACT_SUBVECTOR states that the index must be a
20147   // multiple of the minimum number of elements in the result type.
20148   assert(Index % NumElts == 0 && "The extract subvector index is not a "
20149                                  "multiple of the result's element count");
20150 
20151   // It's fine to use TypeSize here as we know the offset will not be negative.
20152   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
20153 
20154   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20155   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
20156     return SDValue();
20157 
20158   // The narrow load will be offset from the base address of the old load if
20159   // we are extracting from something besides index 0 (little-endian).
20160   SDLoc DL(Extract);
20161 
20162   // TODO: Use "BaseIndexOffset" to make this more effective.
20163   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
20164 
20165   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
20166   MachineFunction &MF = DAG.getMachineFunction();
20167   MachineMemOperand *MMO;
20168   if (Offset.isScalable()) {
20169     MachinePointerInfo MPI =
20170         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
20171     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
20172   } else
20173     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
20174                                   StoreSize);
20175 
20176   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
20177   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
20178   return NewLd;
20179 }
20180 
20181 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
20182   EVT NVT = N->getValueType(0);
20183   SDValue V = N->getOperand(0);
20184   uint64_t ExtIdx = N->getConstantOperandVal(1);
20185 
20186   // Extract from UNDEF is UNDEF.
20187   if (V.isUndef())
20188     return DAG.getUNDEF(NVT);
20189 
20190   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
20191     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
20192       return NarrowLoad;
20193 
20194   // Combine an extract of an extract into a single extract_subvector.
20195   // ext (ext X, C), 0 --> ext X, C
20196   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
20197     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
20198                                     V.getConstantOperandVal(1)) &&
20199         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
20200       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
20201                          V.getOperand(1));
20202     }
20203   }
20204 
20205   // Try to move vector bitcast after extract_subv by scaling extraction index:
20206   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
20207   if (V.getOpcode() == ISD::BITCAST &&
20208       V.getOperand(0).getValueType().isVector()) {
20209     SDValue SrcOp = V.getOperand(0);
20210     EVT SrcVT = SrcOp.getValueType();
20211     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
20212     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
20213     if ((SrcNumElts % DestNumElts) == 0) {
20214       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
20215       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
20216       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
20217                                       NewExtEC);
20218       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20219         SDLoc DL(N);
20220         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
20221         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20222                                          V.getOperand(0), NewIndex);
20223         return DAG.getBitcast(NVT, NewExtract);
20224       }
20225     }
20226     if ((DestNumElts % SrcNumElts) == 0) {
20227       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
20228       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
20229         ElementCount NewExtEC =
20230             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
20231         EVT ScalarVT = SrcVT.getScalarType();
20232         if ((ExtIdx % DestSrcRatio) == 0) {
20233           SDLoc DL(N);
20234           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
20235           EVT NewExtVT =
20236               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
20237           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
20238             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20239             SDValue NewExtract =
20240                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
20241                             V.getOperand(0), NewIndex);
20242             return DAG.getBitcast(NVT, NewExtract);
20243           }
20244           if (NewExtEC.isScalar() &&
20245               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
20246             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
20247             SDValue NewExtract =
20248                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
20249                             V.getOperand(0), NewIndex);
20250             return DAG.getBitcast(NVT, NewExtract);
20251           }
20252         }
20253       }
20254     }
20255   }
20256 
20257   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
20258     unsigned ExtNumElts = NVT.getVectorMinNumElements();
20259     EVT ConcatSrcVT = V.getOperand(0).getValueType();
20260     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
20261            "Concat and extract subvector do not change element type");
20262     assert((ExtIdx % ExtNumElts) == 0 &&
20263            "Extract index is not a multiple of the input vector length.");
20264 
20265     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
20266     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
20267 
20268     // If the concatenated source types match this extract, it's a direct
20269     // simplification:
20270     // extract_subvec (concat V1, V2, ...), i --> Vi
20271     if (ConcatSrcNumElts == ExtNumElts)
20272       return V.getOperand(ConcatOpIdx);
20273 
20274     // If the concatenated source vectors are a multiple length of this extract,
20275     // then extract a fraction of one of those source vectors directly from a
20276     // concat operand. Example:
20277     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
20278     //   v2i8 extract_subvec v8i8 Y, 6
20279     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
20280       SDLoc DL(N);
20281       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
20282       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
20283              "Trying to extract from >1 concat operand?");
20284       assert(NewExtIdx % ExtNumElts == 0 &&
20285              "Extract index is not a multiple of the input vector length.");
20286       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
20287       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
20288                          V.getOperand(ConcatOpIdx), NewIndexC);
20289     }
20290   }
20291 
20292   V = peekThroughBitcasts(V);
20293 
20294   // If the input is a build vector. Try to make a smaller build vector.
20295   if (V.getOpcode() == ISD::BUILD_VECTOR) {
20296     EVT InVT = V.getValueType();
20297     unsigned ExtractSize = NVT.getSizeInBits();
20298     unsigned EltSize = InVT.getScalarSizeInBits();
20299     // Only do this if we won't split any elements.
20300     if (ExtractSize % EltSize == 0) {
20301       unsigned NumElems = ExtractSize / EltSize;
20302       EVT EltVT = InVT.getVectorElementType();
20303       EVT ExtractVT =
20304           NumElems == 1 ? EltVT
20305                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
20306       if ((Level < AfterLegalizeDAG ||
20307            (NumElems == 1 ||
20308             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
20309           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
20310         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
20311 
20312         if (NumElems == 1) {
20313           SDValue Src = V->getOperand(IdxVal);
20314           if (EltVT != Src.getValueType())
20315             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
20316           return DAG.getBitcast(NVT, Src);
20317         }
20318 
20319         // Extract the pieces from the original build_vector.
20320         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
20321                                               V->ops().slice(IdxVal, NumElems));
20322         return DAG.getBitcast(NVT, BuildVec);
20323       }
20324     }
20325   }
20326 
20327   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20328     // Handle only simple case where vector being inserted and vector
20329     // being extracted are of same size.
20330     EVT SmallVT = V.getOperand(1).getValueType();
20331     if (!NVT.bitsEq(SmallVT))
20332       return SDValue();
20333 
20334     // Combine:
20335     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20336     // Into:
20337     //    indices are equal or bit offsets are equal => V1
20338     //    otherwise => (extract_subvec V1, ExtIdx)
20339     uint64_t InsIdx = V.getConstantOperandVal(2);
20340     if (InsIdx * SmallVT.getScalarSizeInBits() ==
20341         ExtIdx * NVT.getScalarSizeInBits())
20342       return DAG.getBitcast(NVT, V.getOperand(1));
20343     return DAG.getNode(
20344         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20345         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20346         N->getOperand(1));
20347   }
20348 
20349   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20350     return NarrowBOp;
20351 
20352   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20353     return SDValue(N, 0);
20354 
20355   return SDValue();
20356 }
20357 
20358 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20359 /// followed by concatenation. Narrow vector ops may have better performance
20360 /// than wide ops, and this can unlock further narrowing of other vector ops.
20361 /// Targets can invert this transform later if it is not profitable.
20362 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20363                                          SelectionDAG &DAG) {
20364   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20365   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20366       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20367       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20368     return SDValue();
20369 
20370   // Split the wide shuffle mask into halves. Any mask element that is accessing
20371   // operand 1 is offset down to account for narrowing of the vectors.
20372   ArrayRef<int> Mask = Shuf->getMask();
20373   EVT VT = Shuf->getValueType(0);
20374   unsigned NumElts = VT.getVectorNumElements();
20375   unsigned HalfNumElts = NumElts / 2;
20376   SmallVector<int, 16> Mask0(HalfNumElts, -1);
20377   SmallVector<int, 16> Mask1(HalfNumElts, -1);
20378   for (unsigned i = 0; i != NumElts; ++i) {
20379     if (Mask[i] == -1)
20380       continue;
20381     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20382     if (i < HalfNumElts)
20383       Mask0[i] = M;
20384     else
20385       Mask1[i - HalfNumElts] = M;
20386   }
20387 
20388   // Ask the target if this is a valid transform.
20389   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20390   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20391                                 HalfNumElts);
20392   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20393       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20394     return SDValue();
20395 
20396   // shuffle (concat X, undef), (concat Y, undef), Mask -->
20397   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20398   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20399   SDLoc DL(Shuf);
20400   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20401   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20402   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20403 }
20404 
20405 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20406 // or turn a shuffle of a single concat into simpler shuffle then concat.
20407 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20408   EVT VT = N->getValueType(0);
20409   unsigned NumElts = VT.getVectorNumElements();
20410 
20411   SDValue N0 = N->getOperand(0);
20412   SDValue N1 = N->getOperand(1);
20413   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20414   ArrayRef<int> Mask = SVN->getMask();
20415 
20416   SmallVector<SDValue, 4> Ops;
20417   EVT ConcatVT = N0.getOperand(0).getValueType();
20418   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20419   unsigned NumConcats = NumElts / NumElemsPerConcat;
20420 
20421   auto IsUndefMaskElt = [](int i) { return i == -1; };
20422 
20423   // Special case: shuffle(concat(A,B)) can be more efficiently represented
20424   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20425   // half vector elements.
20426   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20427       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20428                    IsUndefMaskElt)) {
20429     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20430                               N0.getOperand(1),
20431                               Mask.slice(0, NumElemsPerConcat));
20432     N1 = DAG.getUNDEF(ConcatVT);
20433     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20434   }
20435 
20436   // Look at every vector that's inserted. We're looking for exact
20437   // subvector-sized copies from a concatenated vector
20438   for (unsigned I = 0; I != NumConcats; ++I) {
20439     unsigned Begin = I * NumElemsPerConcat;
20440     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20441 
20442     // Make sure we're dealing with a copy.
20443     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20444       Ops.push_back(DAG.getUNDEF(ConcatVT));
20445       continue;
20446     }
20447 
20448     int OpIdx = -1;
20449     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20450       if (IsUndefMaskElt(SubMask[i]))
20451         continue;
20452       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20453         return SDValue();
20454       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20455       if (0 <= OpIdx && EltOpIdx != OpIdx)
20456         return SDValue();
20457       OpIdx = EltOpIdx;
20458     }
20459     assert(0 <= OpIdx && "Unknown concat_vectors op");
20460 
20461     if (OpIdx < (int)N0.getNumOperands())
20462       Ops.push_back(N0.getOperand(OpIdx));
20463     else
20464       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20465   }
20466 
20467   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20468 }
20469 
20470 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20471 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20472 //
20473 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20474 // a simplification in some sense, but it isn't appropriate in general: some
20475 // BUILD_VECTORs are substantially cheaper than others. The general case
20476 // of a BUILD_VECTOR requires inserting each element individually (or
20477 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20478 // all constants is a single constant pool load.  A BUILD_VECTOR where each
20479 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
20480 // are undef lowers to a small number of element insertions.
20481 //
20482 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20483 // We don't fold shuffles where one side is a non-zero constant, and we don't
20484 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20485 // non-constant operands. This seems to work out reasonably well in practice.
20486 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20487                                        SelectionDAG &DAG,
20488                                        const TargetLowering &TLI) {
20489   EVT VT = SVN->getValueType(0);
20490   unsigned NumElts = VT.getVectorNumElements();
20491   SDValue N0 = SVN->getOperand(0);
20492   SDValue N1 = SVN->getOperand(1);
20493 
20494   if (!N0->hasOneUse())
20495     return SDValue();
20496 
20497   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20498   // discussed above.
20499   if (!N1.isUndef()) {
20500     if (!N1->hasOneUse())
20501       return SDValue();
20502 
20503     bool N0AnyConst = isAnyConstantBuildVector(N0);
20504     bool N1AnyConst = isAnyConstantBuildVector(N1);
20505     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20506       return SDValue();
20507     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20508       return SDValue();
20509   }
20510 
20511   // If both inputs are splats of the same value then we can safely merge this
20512   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20513   bool IsSplat = false;
20514   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20515   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20516   if (BV0 && BV1)
20517     if (SDValue Splat0 = BV0->getSplatValue())
20518       IsSplat = (Splat0 == BV1->getSplatValue());
20519 
20520   SmallVector<SDValue, 8> Ops;
20521   SmallSet<SDValue, 16> DuplicateOps;
20522   for (int M : SVN->getMask()) {
20523     SDValue Op = DAG.getUNDEF(VT.getScalarType());
20524     if (M >= 0) {
20525       int Idx = M < (int)NumElts ? M : M - NumElts;
20526       SDValue &S = (M < (int)NumElts ? N0 : N1);
20527       if (S.getOpcode() == ISD::BUILD_VECTOR) {
20528         Op = S.getOperand(Idx);
20529       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20530         SDValue Op0 = S.getOperand(0);
20531         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20532       } else {
20533         // Operand can't be combined - bail out.
20534         return SDValue();
20535       }
20536     }
20537 
20538     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20539     // generating a splat; semantically, this is fine, but it's likely to
20540     // generate low-quality code if the target can't reconstruct an appropriate
20541     // shuffle.
20542     if (!Op.isUndef() && !isIntOrFPConstant(Op))
20543       if (!IsSplat && !DuplicateOps.insert(Op).second)
20544         return SDValue();
20545 
20546     Ops.push_back(Op);
20547   }
20548 
20549   // BUILD_VECTOR requires all inputs to be of the same type, find the
20550   // maximum type and extend them all.
20551   EVT SVT = VT.getScalarType();
20552   if (SVT.isInteger())
20553     for (SDValue &Op : Ops)
20554       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20555   if (SVT != VT.getScalarType())
20556     for (SDValue &Op : Ops)
20557       Op = TLI.isZExtFree(Op.getValueType(), SVT)
20558                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20559                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20560   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20561 }
20562 
20563 // Match shuffles that can be converted to any_vector_extend_in_reg.
20564 // This is often generated during legalization.
20565 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20566 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20567 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20568                                             SelectionDAG &DAG,
20569                                             const TargetLowering &TLI,
20570                                             bool LegalOperations) {
20571   EVT VT = SVN->getValueType(0);
20572   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20573 
20574   // TODO Add support for big-endian when we have a test case.
20575   if (!VT.isInteger() || IsBigEndian)
20576     return SDValue();
20577 
20578   unsigned NumElts = VT.getVectorNumElements();
20579   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20580   ArrayRef<int> Mask = SVN->getMask();
20581   SDValue N0 = SVN->getOperand(0);
20582 
20583   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20584   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20585     for (unsigned i = 0; i != NumElts; ++i) {
20586       if (Mask[i] < 0)
20587         continue;
20588       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20589         continue;
20590       return false;
20591     }
20592     return true;
20593   };
20594 
20595   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20596   // power-of-2 extensions as they are the most likely.
20597   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20598     // Check for non power of 2 vector sizes
20599     if (NumElts % Scale != 0)
20600       continue;
20601     if (!isAnyExtend(Scale))
20602       continue;
20603 
20604     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20605     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20606     // Never create an illegal type. Only create unsupported operations if we
20607     // are pre-legalization.
20608     if (TLI.isTypeLegal(OutVT))
20609       if (!LegalOperations ||
20610           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20611         return DAG.getBitcast(VT,
20612                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20613                                           SDLoc(SVN), OutVT, N0));
20614   }
20615 
20616   return SDValue();
20617 }
20618 
20619 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20620 // each source element of a large type into the lowest elements of a smaller
20621 // destination type. This is often generated during legalization.
20622 // If the source node itself was a '*_extend_vector_inreg' node then we should
20623 // then be able to remove it.
20624 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20625                                         SelectionDAG &DAG) {
20626   EVT VT = SVN->getValueType(0);
20627   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20628 
20629   // TODO Add support for big-endian when we have a test case.
20630   if (!VT.isInteger() || IsBigEndian)
20631     return SDValue();
20632 
20633   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20634 
20635   unsigned Opcode = N0.getOpcode();
20636   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20637       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20638       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20639     return SDValue();
20640 
20641   SDValue N00 = N0.getOperand(0);
20642   ArrayRef<int> Mask = SVN->getMask();
20643   unsigned NumElts = VT.getVectorNumElements();
20644   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20645   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20646   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20647 
20648   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20649     return SDValue();
20650   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20651 
20652   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20653   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20654   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20655   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20656     for (unsigned i = 0; i != NumElts; ++i) {
20657       if (Mask[i] < 0)
20658         continue;
20659       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20660         continue;
20661       return false;
20662     }
20663     return true;
20664   };
20665 
20666   // At the moment we just handle the case where we've truncated back to the
20667   // same size as before the extension.
20668   // TODO: handle more extension/truncation cases as cases arise.
20669   if (EltSizeInBits != ExtSrcSizeInBits)
20670     return SDValue();
20671 
20672   // We can remove *extend_vector_inreg only if the truncation happens at
20673   // the same scale as the extension.
20674   if (isTruncate(ExtScale))
20675     return DAG.getBitcast(VT, N00);
20676 
20677   return SDValue();
20678 }
20679 
20680 // Combine shuffles of splat-shuffles of the form:
20681 // shuffle (shuffle V, undef, splat-mask), undef, M
20682 // If splat-mask contains undef elements, we need to be careful about
20683 // introducing undef's in the folded mask which are not the result of composing
20684 // the masks of the shuffles.
20685 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20686                                         SelectionDAG &DAG) {
20687   if (!Shuf->getOperand(1).isUndef())
20688     return SDValue();
20689   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20690   if (!Splat || !Splat->isSplat())
20691     return SDValue();
20692 
20693   ArrayRef<int> ShufMask = Shuf->getMask();
20694   ArrayRef<int> SplatMask = Splat->getMask();
20695   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
20696 
20697   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20698   // every undef mask element in the splat-shuffle has a corresponding undef
20699   // element in the user-shuffle's mask or if the composition of mask elements
20700   // would result in undef.
20701   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20702   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20703   //   In this case it is not legal to simplify to the splat-shuffle because we
20704   //   may be exposing the users of the shuffle an undef element at index 1
20705   //   which was not there before the combine.
20706   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20707   //   In this case the composition of masks yields SplatMask, so it's ok to
20708   //   simplify to the splat-shuffle.
20709   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20710   //   In this case the composed mask includes all undef elements of SplatMask
20711   //   and in addition sets element zero to undef. It is safe to simplify to
20712   //   the splat-shuffle.
20713   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20714                                        ArrayRef<int> SplatMask) {
20715     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20716       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20717           SplatMask[UserMask[i]] != -1)
20718         return false;
20719     return true;
20720   };
20721   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20722     return Shuf->getOperand(0);
20723 
20724   // Create a new shuffle with a mask that is composed of the two shuffles'
20725   // masks.
20726   SmallVector<int, 32> NewMask;
20727   for (int Idx : ShufMask)
20728     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20729 
20730   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20731                               Splat->getOperand(0), Splat->getOperand(1),
20732                               NewMask);
20733 }
20734 
20735 /// Combine shuffle of shuffle of the form:
20736 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20737 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20738                                      SelectionDAG &DAG) {
20739   if (!OuterShuf->getOperand(1).isUndef())
20740     return SDValue();
20741   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20742   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20743     return SDValue();
20744 
20745   ArrayRef<int> OuterMask = OuterShuf->getMask();
20746   ArrayRef<int> InnerMask = InnerShuf->getMask();
20747   unsigned NumElts = OuterMask.size();
20748   assert(NumElts == InnerMask.size() && "Mask length mismatch");
20749   SmallVector<int, 32> CombinedMask(NumElts, -1);
20750   int SplatIndex = -1;
20751   for (unsigned i = 0; i != NumElts; ++i) {
20752     // Undef lanes remain undef.
20753     int OuterMaskElt = OuterMask[i];
20754     if (OuterMaskElt == -1)
20755       continue;
20756 
20757     // Peek through the shuffle masks to get the underlying source element.
20758     int InnerMaskElt = InnerMask[OuterMaskElt];
20759     if (InnerMaskElt == -1)
20760       continue;
20761 
20762     // Initialize the splatted element.
20763     if (SplatIndex == -1)
20764       SplatIndex = InnerMaskElt;
20765 
20766     // Non-matching index - this is not a splat.
20767     if (SplatIndex != InnerMaskElt)
20768       return SDValue();
20769 
20770     CombinedMask[i] = InnerMaskElt;
20771   }
20772   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
20773           getSplatIndex(CombinedMask) != -1) &&
20774          "Expected a splat mask");
20775 
20776   // TODO: The transform may be a win even if the mask is not legal.
20777   EVT VT = OuterShuf->getValueType(0);
20778   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
20779   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20780     return SDValue();
20781 
20782   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20783                               InnerShuf->getOperand(1), CombinedMask);
20784 }
20785 
20786 /// If the shuffle mask is taking exactly one element from the first vector
20787 /// operand and passing through all other elements from the second vector
20788 /// operand, return the index of the mask element that is choosing an element
20789 /// from the first operand. Otherwise, return -1.
20790 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20791   int MaskSize = Mask.size();
20792   int EltFromOp0 = -1;
20793   // TODO: This does not match if there are undef elements in the shuffle mask.
20794   // Should we ignore undefs in the shuffle mask instead? The trade-off is
20795   // removing an instruction (a shuffle), but losing the knowledge that some
20796   // vector lanes are not needed.
20797   for (int i = 0; i != MaskSize; ++i) {
20798     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20799       // We're looking for a shuffle of exactly one element from operand 0.
20800       if (EltFromOp0 != -1)
20801         return -1;
20802       EltFromOp0 = i;
20803     } else if (Mask[i] != i + MaskSize) {
20804       // Nothing from operand 1 can change lanes.
20805       return -1;
20806     }
20807   }
20808   return EltFromOp0;
20809 }
20810 
20811 /// If a shuffle inserts exactly one element from a source vector operand into
20812 /// another vector operand and we can access the specified element as a scalar,
20813 /// then we can eliminate the shuffle.
20814 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20815                                       SelectionDAG &DAG) {
20816   // First, check if we are taking one element of a vector and shuffling that
20817   // element into another vector.
20818   ArrayRef<int> Mask = Shuf->getMask();
20819   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20820   SDValue Op0 = Shuf->getOperand(0);
20821   SDValue Op1 = Shuf->getOperand(1);
20822   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20823   if (ShufOp0Index == -1) {
20824     // Commute mask and check again.
20825     ShuffleVectorSDNode::commuteMask(CommutedMask);
20826     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20827     if (ShufOp0Index == -1)
20828       return SDValue();
20829     // Commute operands to match the commuted shuffle mask.
20830     std::swap(Op0, Op1);
20831     Mask = CommutedMask;
20832   }
20833 
20834   // The shuffle inserts exactly one element from operand 0 into operand 1.
20835   // Now see if we can access that element as a scalar via a real insert element
20836   // instruction.
20837   // TODO: We can try harder to locate the element as a scalar. Examples: it
20838   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20839   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
20840          "Shuffle mask value must be from operand 0");
20841   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20842     return SDValue();
20843 
20844   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20845   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20846     return SDValue();
20847 
20848   // There's an existing insertelement with constant insertion index, so we
20849   // don't need to check the legality/profitability of a replacement operation
20850   // that differs at most in the constant value. The target should be able to
20851   // lower any of those in a similar way. If not, legalization will expand this
20852   // to a scalar-to-vector plus shuffle.
20853   //
20854   // Note that the shuffle may move the scalar from the position that the insert
20855   // element used. Therefore, our new insert element occurs at the shuffle's
20856   // mask index value, not the insert's index value.
20857   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20858   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20859   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20860                      Op1, Op0.getOperand(1), NewInsIndex);
20861 }
20862 
20863 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20864 /// completely. This has the potential to lose undef knowledge because the first
20865 /// shuffle may not have an undef mask element where the second one does. So
20866 /// only call this after doing simplifications based on demanded elements.
20867 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20868   // shuf (shuf0 X, Y, Mask0), undef, Mask
20869   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20870   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20871     return SDValue();
20872 
20873   ArrayRef<int> Mask = Shuf->getMask();
20874   ArrayRef<int> Mask0 = Shuf0->getMask();
20875   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20876     // Ignore undef elements.
20877     if (Mask[i] == -1)
20878       continue;
20879     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20880 
20881     // Is the element of the shuffle operand chosen by this shuffle the same as
20882     // the element chosen by the shuffle operand itself?
20883     if (Mask0[Mask[i]] != Mask0[i])
20884       return SDValue();
20885   }
20886   // Every element of this shuffle is identical to the result of the previous
20887   // shuffle, so we can replace this value.
20888   return Shuf->getOperand(0);
20889 }
20890 
20891 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20892   EVT VT = N->getValueType(0);
20893   unsigned NumElts = VT.getVectorNumElements();
20894 
20895   SDValue N0 = N->getOperand(0);
20896   SDValue N1 = N->getOperand(1);
20897 
20898   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20899 
20900   // Canonicalize shuffle undef, undef -> undef
20901   if (N0.isUndef() && N1.isUndef())
20902     return DAG.getUNDEF(VT);
20903 
20904   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20905 
20906   // Canonicalize shuffle v, v -> v, undef
20907   if (N0 == N1) {
20908     SmallVector<int, 8> NewMask;
20909     for (unsigned i = 0; i != NumElts; ++i) {
20910       int Idx = SVN->getMaskElt(i);
20911       if (Idx >= (int)NumElts) Idx -= NumElts;
20912       NewMask.push_back(Idx);
20913     }
20914     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20915   }
20916 
20917   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20918   if (N0.isUndef())
20919     return DAG.getCommutedVectorShuffle(*SVN);
20920 
20921   // Remove references to rhs if it is undef
20922   if (N1.isUndef()) {
20923     bool Changed = false;
20924     SmallVector<int, 8> NewMask;
20925     for (unsigned i = 0; i != NumElts; ++i) {
20926       int Idx = SVN->getMaskElt(i);
20927       if (Idx >= (int)NumElts) {
20928         Idx = -1;
20929         Changed = true;
20930       }
20931       NewMask.push_back(Idx);
20932     }
20933     if (Changed)
20934       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20935   }
20936 
20937   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20938     return InsElt;
20939 
20940   // A shuffle of a single vector that is a splatted value can always be folded.
20941   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20942     return V;
20943 
20944   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20945     return V;
20946 
20947   // If it is a splat, check if the argument vector is another splat or a
20948   // build_vector.
20949   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20950     int SplatIndex = SVN->getSplatIndex();
20951     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20952         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20953       // splat (vector_bo L, R), Index -->
20954       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20955       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20956       SDLoc DL(N);
20957       EVT EltVT = VT.getScalarType();
20958       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20959       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20960       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20961       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20962                                   N0.getNode()->getFlags());
20963       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20964       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20965       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20966     }
20967 
20968     // If this is a bit convert that changes the element type of the vector but
20969     // not the number of vector elements, look through it.  Be careful not to
20970     // look though conversions that change things like v4f32 to v2f64.
20971     SDNode *V = N0.getNode();
20972     if (V->getOpcode() == ISD::BITCAST) {
20973       SDValue ConvInput = V->getOperand(0);
20974       if (ConvInput.getValueType().isVector() &&
20975           ConvInput.getValueType().getVectorNumElements() == NumElts)
20976         V = ConvInput.getNode();
20977     }
20978 
20979     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20980       assert(V->getNumOperands() == NumElts &&
20981              "BUILD_VECTOR has wrong number of operands");
20982       SDValue Base;
20983       bool AllSame = true;
20984       for (unsigned i = 0; i != NumElts; ++i) {
20985         if (!V->getOperand(i).isUndef()) {
20986           Base = V->getOperand(i);
20987           break;
20988         }
20989       }
20990       // Splat of <u, u, u, u>, return <u, u, u, u>
20991       if (!Base.getNode())
20992         return N0;
20993       for (unsigned i = 0; i != NumElts; ++i) {
20994         if (V->getOperand(i) != Base) {
20995           AllSame = false;
20996           break;
20997         }
20998       }
20999       // Splat of <x, x, x, x>, return <x, x, x, x>
21000       if (AllSame)
21001         return N0;
21002 
21003       // Canonicalize any other splat as a build_vector.
21004       SDValue Splatted = V->getOperand(SplatIndex);
21005       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
21006       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
21007 
21008       // We may have jumped through bitcasts, so the type of the
21009       // BUILD_VECTOR may not match the type of the shuffle.
21010       if (V->getValueType(0) != VT)
21011         NewBV = DAG.getBitcast(VT, NewBV);
21012       return NewBV;
21013     }
21014   }
21015 
21016   // Simplify source operands based on shuffle mask.
21017   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21018     return SDValue(N, 0);
21019 
21020   // This is intentionally placed after demanded elements simplification because
21021   // it could eliminate knowledge of undef elements created by this shuffle.
21022   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
21023     return ShufOp;
21024 
21025   // Match shuffles that can be converted to any_vector_extend_in_reg.
21026   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
21027     return V;
21028 
21029   // Combine "truncate_vector_in_reg" style shuffles.
21030   if (SDValue V = combineTruncationShuffle(SVN, DAG))
21031     return V;
21032 
21033   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
21034       Level < AfterLegalizeVectorOps &&
21035       (N1.isUndef() ||
21036       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
21037        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
21038     if (SDValue V = partitionShuffleOfConcats(N, DAG))
21039       return V;
21040   }
21041 
21042   // A shuffle of a concat of the same narrow vector can be reduced to use
21043   // only low-half elements of a concat with undef:
21044   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
21045   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
21046       N0.getNumOperands() == 2 &&
21047       N0.getOperand(0) == N0.getOperand(1)) {
21048     int HalfNumElts = (int)NumElts / 2;
21049     SmallVector<int, 8> NewMask;
21050     for (unsigned i = 0; i != NumElts; ++i) {
21051       int Idx = SVN->getMaskElt(i);
21052       if (Idx >= HalfNumElts) {
21053         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
21054         Idx -= HalfNumElts;
21055       }
21056       NewMask.push_back(Idx);
21057     }
21058     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
21059       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
21060       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
21061                                    N0.getOperand(0), UndefVec);
21062       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
21063     }
21064   }
21065 
21066   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
21067   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
21068   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
21069     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
21070       return Res;
21071 
21072   // If this shuffle only has a single input that is a bitcasted shuffle,
21073   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
21074   // back to their original types.
21075   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
21076       N1.isUndef() && Level < AfterLegalizeVectorOps &&
21077       TLI.isTypeLegal(VT)) {
21078 
21079     SDValue BC0 = peekThroughOneUseBitcasts(N0);
21080     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
21081       EVT SVT = VT.getScalarType();
21082       EVT InnerVT = BC0->getValueType(0);
21083       EVT InnerSVT = InnerVT.getScalarType();
21084 
21085       // Determine which shuffle works with the smaller scalar type.
21086       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
21087       EVT ScaleSVT = ScaleVT.getScalarType();
21088 
21089       if (TLI.isTypeLegal(ScaleVT) &&
21090           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
21091           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
21092         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21093         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
21094 
21095         // Scale the shuffle masks to the smaller scalar type.
21096         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
21097         SmallVector<int, 8> InnerMask;
21098         SmallVector<int, 8> OuterMask;
21099         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
21100         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
21101 
21102         // Merge the shuffle masks.
21103         SmallVector<int, 8> NewMask;
21104         for (int M : OuterMask)
21105           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
21106 
21107         // Test for shuffle mask legality over both commutations.
21108         SDValue SV0 = BC0->getOperand(0);
21109         SDValue SV1 = BC0->getOperand(1);
21110         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21111         if (!LegalMask) {
21112           std::swap(SV0, SV1);
21113           ShuffleVectorSDNode::commuteMask(NewMask);
21114           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
21115         }
21116 
21117         if (LegalMask) {
21118           SV0 = DAG.getBitcast(ScaleVT, SV0);
21119           SV1 = DAG.getBitcast(ScaleVT, SV1);
21120           return DAG.getBitcast(
21121               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
21122         }
21123       }
21124     }
21125   }
21126 
21127   // Compute the combined shuffle mask for a shuffle with SV0 as the first
21128   // operand, and SV1 as the second operand.
21129   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
21130   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
21131   auto MergeInnerShuffle =
21132       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
21133                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
21134                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
21135                      SmallVectorImpl<int> &Mask) -> bool {
21136     // Don't try to fold splats; they're likely to simplify somehow, or they
21137     // might be free.
21138     if (OtherSVN->isSplat())
21139       return false;
21140 
21141     SV0 = SV1 = SDValue();
21142     Mask.clear();
21143 
21144     for (unsigned i = 0; i != NumElts; ++i) {
21145       int Idx = SVN->getMaskElt(i);
21146       if (Idx < 0) {
21147         // Propagate Undef.
21148         Mask.push_back(Idx);
21149         continue;
21150       }
21151 
21152       if (Commute)
21153         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
21154 
21155       SDValue CurrentVec;
21156       if (Idx < (int)NumElts) {
21157         // This shuffle index refers to the inner shuffle N0. Lookup the inner
21158         // shuffle mask to identify which vector is actually referenced.
21159         Idx = OtherSVN->getMaskElt(Idx);
21160         if (Idx < 0) {
21161           // Propagate Undef.
21162           Mask.push_back(Idx);
21163           continue;
21164         }
21165         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
21166                                           : OtherSVN->getOperand(1);
21167       } else {
21168         // This shuffle index references an element within N1.
21169         CurrentVec = N1;
21170       }
21171 
21172       // Simple case where 'CurrentVec' is UNDEF.
21173       if (CurrentVec.isUndef()) {
21174         Mask.push_back(-1);
21175         continue;
21176       }
21177 
21178       // Canonicalize the shuffle index. We don't know yet if CurrentVec
21179       // will be the first or second operand of the combined shuffle.
21180       Idx = Idx % NumElts;
21181       if (!SV0.getNode() || SV0 == CurrentVec) {
21182         // Ok. CurrentVec is the left hand side.
21183         // Update the mask accordingly.
21184         SV0 = CurrentVec;
21185         Mask.push_back(Idx);
21186         continue;
21187       }
21188       if (!SV1.getNode() || SV1 == CurrentVec) {
21189         // Ok. CurrentVec is the right hand side.
21190         // Update the mask accordingly.
21191         SV1 = CurrentVec;
21192         Mask.push_back(Idx + NumElts);
21193         continue;
21194       }
21195 
21196       // Last chance - see if the vector is another shuffle and if it
21197       // uses one of the existing candidate shuffle ops.
21198       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
21199         int InnerIdx = CurrentSVN->getMaskElt(Idx);
21200         if (InnerIdx < 0) {
21201           Mask.push_back(-1);
21202           continue;
21203         }
21204         SDValue InnerVec = (InnerIdx < (int)NumElts)
21205                                ? CurrentSVN->getOperand(0)
21206                                : CurrentSVN->getOperand(1);
21207         if (InnerVec.isUndef()) {
21208           Mask.push_back(-1);
21209           continue;
21210         }
21211         InnerIdx %= NumElts;
21212         if (InnerVec == SV0) {
21213           Mask.push_back(InnerIdx);
21214           continue;
21215         }
21216         if (InnerVec == SV1) {
21217           Mask.push_back(InnerIdx + NumElts);
21218           continue;
21219         }
21220       }
21221 
21222       // Bail out if we cannot convert the shuffle pair into a single shuffle.
21223       return false;
21224     }
21225 
21226     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21227       return true;
21228 
21229     // Avoid introducing shuffles with illegal mask.
21230     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21231     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21232     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21233     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
21234     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
21235     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
21236     if (TLI.isShuffleMaskLegal(Mask, VT))
21237       return true;
21238 
21239     std::swap(SV0, SV1);
21240     ShuffleVectorSDNode::commuteMask(Mask);
21241     return TLI.isShuffleMaskLegal(Mask, VT);
21242   };
21243 
21244   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
21245     // Canonicalize shuffles according to rules:
21246     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
21247     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
21248     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
21249     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21250         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
21251       // The incoming shuffle must be of the same type as the result of the
21252       // current shuffle.
21253       assert(N1->getOperand(0).getValueType() == VT &&
21254              "Shuffle types don't match");
21255 
21256       SDValue SV0 = N1->getOperand(0);
21257       SDValue SV1 = N1->getOperand(1);
21258       bool HasSameOp0 = N0 == SV0;
21259       bool IsSV1Undef = SV1.isUndef();
21260       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
21261         // Commute the operands of this shuffle so merging below will trigger.
21262         return DAG.getCommutedVectorShuffle(*SVN);
21263     }
21264 
21265     // Canonicalize splat shuffles to the RHS to improve merging below.
21266     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
21267     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
21268         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
21269         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
21270         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
21271       return DAG.getCommutedVectorShuffle(*SVN);
21272     }
21273 
21274     // Try to fold according to rules:
21275     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
21276     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
21277     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
21278     // Don't try to fold shuffles with illegal type.
21279     // Only fold if this shuffle is the only user of the other shuffle.
21280     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
21281     for (int i = 0; i != 2; ++i) {
21282       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
21283           N->isOnlyUserOf(N->getOperand(i).getNode())) {
21284         // The incoming shuffle must be of the same type as the result of the
21285         // current shuffle.
21286         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
21287         assert(OtherSV->getOperand(0).getValueType() == VT &&
21288                "Shuffle types don't match");
21289 
21290         SDValue SV0, SV1;
21291         SmallVector<int, 4> Mask;
21292         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
21293                               SV0, SV1, Mask)) {
21294           // Check if all indices in Mask are Undef. In case, propagate Undef.
21295           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
21296             return DAG.getUNDEF(VT);
21297 
21298           return DAG.getVectorShuffle(VT, SDLoc(N),
21299                                       SV0 ? SV0 : DAG.getUNDEF(VT),
21300                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
21301         }
21302       }
21303     }
21304 
21305     // Merge shuffles through binops if we are able to merge it with at least
21306     // one other shuffles.
21307     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
21308     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
21309     unsigned SrcOpcode = N0.getOpcode();
21310     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
21311         (N1.isUndef() ||
21312          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
21313       // Get binop source ops, or just pass on the undef.
21314       SDValue Op00 = N0.getOperand(0);
21315       SDValue Op01 = N0.getOperand(1);
21316       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
21317       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
21318       // TODO: We might be able to relax the VT check but we don't currently
21319       // have any isBinOp() that has different result/ops VTs so play safe until
21320       // we have test coverage.
21321       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
21322           Op01.getValueType() == VT && Op11.getValueType() == VT &&
21323           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
21324            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
21325            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
21326            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
21327         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
21328                                         SmallVectorImpl<int> &Mask, bool LeftOp,
21329                                         bool Commute) {
21330           SDValue InnerN = Commute ? N1 : N0;
21331           SDValue Op0 = LeftOp ? Op00 : Op01;
21332           SDValue Op1 = LeftOp ? Op10 : Op11;
21333           if (Commute)
21334             std::swap(Op0, Op1);
21335           // Only accept the merged shuffle if we don't introduce undef elements,
21336           // or the inner shuffle already contained undef elements.
21337           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
21338           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
21339                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
21340                                    Mask) &&
21341                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
21342                   llvm::none_of(Mask, [](int M) { return M < 0; }));
21343         };
21344 
21345         // Ensure we don't increase the number of shuffles - we must merge a
21346         // shuffle from at least one of the LHS and RHS ops.
21347         bool MergedLeft = false;
21348         SDValue LeftSV0, LeftSV1;
21349         SmallVector<int, 4> LeftMask;
21350         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
21351             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
21352           MergedLeft = true;
21353         } else {
21354           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21355           LeftSV0 = Op00, LeftSV1 = Op10;
21356         }
21357 
21358         bool MergedRight = false;
21359         SDValue RightSV0, RightSV1;
21360         SmallVector<int, 4> RightMask;
21361         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
21362             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
21363           MergedRight = true;
21364         } else {
21365           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
21366           RightSV0 = Op01, RightSV1 = Op11;
21367         }
21368 
21369         if (MergedLeft || MergedRight) {
21370           SDLoc DL(N);
21371           SDValue LHS = DAG.getVectorShuffle(
21372               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
21373               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
21374           SDValue RHS = DAG.getVectorShuffle(
21375               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
21376               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
21377           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
21378         }
21379       }
21380     }
21381   }
21382 
21383   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
21384     return V;
21385 
21386   return SDValue();
21387 }
21388 
21389 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
21390   SDValue InVal = N->getOperand(0);
21391   EVT VT = N->getValueType(0);
21392 
21393   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
21394   // with a VECTOR_SHUFFLE and possible truncate.
21395   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21396       VT.isFixedLengthVector() &&
21397       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
21398     SDValue InVec = InVal->getOperand(0);
21399     SDValue EltNo = InVal->getOperand(1);
21400     auto InVecT = InVec.getValueType();
21401     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
21402       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
21403       int Elt = C0->getZExtValue();
21404       NewMask[0] = Elt;
21405       // If we have an implict truncate do truncate here as long as it's legal.
21406       // if it's not legal, this should
21407       if (VT.getScalarType() != InVal.getValueType() &&
21408           InVal.getValueType().isScalarInteger() &&
21409           isTypeLegal(VT.getScalarType())) {
21410         SDValue Val =
21411             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
21412         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
21413       }
21414       if (VT.getScalarType() == InVecT.getScalarType() &&
21415           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
21416         SDValue LegalShuffle =
21417           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
21418                                       DAG.getUNDEF(InVecT), NewMask, DAG);
21419         if (LegalShuffle) {
21420           // If the initial vector is the correct size this shuffle is a
21421           // valid result.
21422           if (VT == InVecT)
21423             return LegalShuffle;
21424           // If not we must truncate the vector.
21425           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
21426             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
21427             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
21428                                          InVecT.getVectorElementType(),
21429                                          VT.getVectorNumElements());
21430             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
21431                                LegalShuffle, ZeroIdx);
21432           }
21433         }
21434       }
21435     }
21436   }
21437 
21438   return SDValue();
21439 }
21440 
21441 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
21442   EVT VT = N->getValueType(0);
21443   SDValue N0 = N->getOperand(0);
21444   SDValue N1 = N->getOperand(1);
21445   SDValue N2 = N->getOperand(2);
21446   uint64_t InsIdx = N->getConstantOperandVal(2);
21447 
21448   // If inserting an UNDEF, just return the original vector.
21449   if (N1.isUndef())
21450     return N0;
21451 
21452   // If this is an insert of an extracted vector into an undef vector, we can
21453   // just use the input to the extract.
21454   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21455       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
21456     return N1.getOperand(0);
21457 
21458   // If we are inserting a bitcast value into an undef, with the same
21459   // number of elements, just use the bitcast input of the extract.
21460   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21461   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21462   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21463       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21464       N1.getOperand(0).getOperand(1) == N2 &&
21465       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
21466           VT.getVectorElementCount() &&
21467       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21468           VT.getSizeInBits()) {
21469     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21470   }
21471 
21472   // If both N1 and N2 are bitcast values on which insert_subvector
21473   // would makes sense, pull the bitcast through.
21474   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21475   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21476   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21477     SDValue CN0 = N0.getOperand(0);
21478     SDValue CN1 = N1.getOperand(0);
21479     EVT CN0VT = CN0.getValueType();
21480     EVT CN1VT = CN1.getValueType();
21481     if (CN0VT.isVector() && CN1VT.isVector() &&
21482         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21483         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
21484       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21485                                       CN0.getValueType(), CN0, CN1, N2);
21486       return DAG.getBitcast(VT, NewINSERT);
21487     }
21488   }
21489 
21490   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21491   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21492   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21493   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21494       N0.getOperand(1).getValueType() == N1.getValueType() &&
21495       N0.getOperand(2) == N2)
21496     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21497                        N1, N2);
21498 
21499   // Eliminate an intermediate insert into an undef vector:
21500   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21501   // insert_subvector undef, X, N2
21502   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21503       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21504     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21505                        N1.getOperand(1), N2);
21506 
21507   // Push subvector bitcasts to the output, adjusting the index as we go.
21508   // insert_subvector(bitcast(v), bitcast(s), c1)
21509   // -> bitcast(insert_subvector(v, s, c2))
21510   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21511       N1.getOpcode() == ISD::BITCAST) {
21512     SDValue N0Src = peekThroughBitcasts(N0);
21513     SDValue N1Src = peekThroughBitcasts(N1);
21514     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21515     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21516     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21517         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21518       EVT NewVT;
21519       SDLoc DL(N);
21520       SDValue NewIdx;
21521       LLVMContext &Ctx = *DAG.getContext();
21522       ElementCount NumElts = VT.getVectorElementCount();
21523       unsigned EltSizeInBits = VT.getScalarSizeInBits();
21524       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21525         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21526         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21527         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21528       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21529         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21530         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21531           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21532                                    NumElts.divideCoefficientBy(Scale));
21533           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21534         }
21535       }
21536       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21537         SDValue Res = DAG.getBitcast(NewVT, N0Src);
21538         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21539         return DAG.getBitcast(VT, Res);
21540       }
21541     }
21542   }
21543 
21544   // Canonicalize insert_subvector dag nodes.
21545   // Example:
21546   // (insert_subvector (insert_subvector A, Idx0), Idx1)
21547   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21548   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21549       N1.getValueType() == N0.getOperand(1).getValueType()) {
21550     unsigned OtherIdx = N0.getConstantOperandVal(2);
21551     if (InsIdx < OtherIdx) {
21552       // Swap nodes.
21553       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21554                                   N0.getOperand(0), N1, N2);
21555       AddToWorklist(NewOp.getNode());
21556       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21557                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21558     }
21559   }
21560 
21561   // If the input vector is a concatenation, and the insert replaces
21562   // one of the pieces, we can optimize into a single concat_vectors.
21563   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21564       N0.getOperand(0).getValueType() == N1.getValueType() &&
21565       N0.getOperand(0).getValueType().isScalableVector() ==
21566           N1.getValueType().isScalableVector()) {
21567     unsigned Factor = N1.getValueType().getVectorMinNumElements();
21568     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21569     Ops[InsIdx / Factor] = N1;
21570     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21571   }
21572 
21573   // Simplify source operands based on insertion.
21574   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21575     return SDValue(N, 0);
21576 
21577   return SDValue();
21578 }
21579 
21580 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21581   SDValue N0 = N->getOperand(0);
21582 
21583   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21584   if (N0->getOpcode() == ISD::FP16_TO_FP)
21585     return N0->getOperand(0);
21586 
21587   return SDValue();
21588 }
21589 
21590 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21591   SDValue N0 = N->getOperand(0);
21592 
21593   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21594   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
21595     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21596     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21597       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21598                          N0.getOperand(0));
21599     }
21600   }
21601 
21602   return SDValue();
21603 }
21604 
21605 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21606   SDValue N0 = N->getOperand(0);
21607   EVT VT = N0.getValueType();
21608   unsigned Opcode = N->getOpcode();
21609 
21610   // VECREDUCE over 1-element vector is just an extract.
21611   if (VT.getVectorElementCount().isScalar()) {
21612     SDLoc dl(N);
21613     SDValue Res =
21614         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21615                     DAG.getVectorIdxConstant(0, dl));
21616     if (Res.getValueType() != N->getValueType(0))
21617       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21618     return Res;
21619   }
21620 
21621   // On an boolean vector an and/or reduction is the same as a umin/umax
21622   // reduction. Convert them if the latter is legal while the former isn't.
21623   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21624     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21625         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21626     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21627         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21628         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21629       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21630   }
21631 
21632   return SDValue();
21633 }
21634 
21635 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21636 /// with the destination vector and a zero vector.
21637 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21638 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
21639 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21640   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
21641 
21642   EVT VT = N->getValueType(0);
21643   SDValue LHS = N->getOperand(0);
21644   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21645   SDLoc DL(N);
21646 
21647   // Make sure we're not running after operation legalization where it
21648   // may have custom lowered the vector shuffles.
21649   if (LegalOperations)
21650     return SDValue();
21651 
21652   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21653     return SDValue();
21654 
21655   EVT RVT = RHS.getValueType();
21656   unsigned NumElts = RHS.getNumOperands();
21657 
21658   // Attempt to create a valid clear mask, splitting the mask into
21659   // sub elements and checking to see if each is
21660   // all zeros or all ones - suitable for shuffle masking.
21661   auto BuildClearMask = [&](int Split) {
21662     int NumSubElts = NumElts * Split;
21663     int NumSubBits = RVT.getScalarSizeInBits() / Split;
21664 
21665     SmallVector<int, 8> Indices;
21666     for (int i = 0; i != NumSubElts; ++i) {
21667       int EltIdx = i / Split;
21668       int SubIdx = i % Split;
21669       SDValue Elt = RHS.getOperand(EltIdx);
21670       // X & undef --> 0 (not undef). So this lane must be converted to choose
21671       // from the zero constant vector (same as if the element had all 0-bits).
21672       if (Elt.isUndef()) {
21673         Indices.push_back(i + NumSubElts);
21674         continue;
21675       }
21676 
21677       APInt Bits;
21678       if (isa<ConstantSDNode>(Elt))
21679         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21680       else if (isa<ConstantFPSDNode>(Elt))
21681         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21682       else
21683         return SDValue();
21684 
21685       // Extract the sub element from the constant bit mask.
21686       if (DAG.getDataLayout().isBigEndian())
21687         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21688       else
21689         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21690 
21691       if (Bits.isAllOnesValue())
21692         Indices.push_back(i);
21693       else if (Bits == 0)
21694         Indices.push_back(i + NumSubElts);
21695       else
21696         return SDValue();
21697     }
21698 
21699     // Let's see if the target supports this vector_shuffle.
21700     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21701     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21702     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21703       return SDValue();
21704 
21705     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21706     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21707                                                    DAG.getBitcast(ClearVT, LHS),
21708                                                    Zero, Indices));
21709   };
21710 
21711   // Determine maximum split level (byte level masking).
21712   int MaxSplit = 1;
21713   if (RVT.getScalarSizeInBits() % 8 == 0)
21714     MaxSplit = RVT.getScalarSizeInBits() / 8;
21715 
21716   for (int Split = 1; Split <= MaxSplit; ++Split)
21717     if (RVT.getScalarSizeInBits() % Split == 0)
21718       if (SDValue S = BuildClearMask(Split))
21719         return S;
21720 
21721   return SDValue();
21722 }
21723 
21724 /// If a vector binop is performed on splat values, it may be profitable to
21725 /// extract, scalarize, and insert/splat.
21726 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21727   SDValue N0 = N->getOperand(0);
21728   SDValue N1 = N->getOperand(1);
21729   unsigned Opcode = N->getOpcode();
21730   EVT VT = N->getValueType(0);
21731   EVT EltVT = VT.getVectorElementType();
21732   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21733 
21734   // TODO: Remove/replace the extract cost check? If the elements are available
21735   //       as scalars, then there may be no extract cost. Should we ask if
21736   //       inserting a scalar back into a vector is cheap instead?
21737   int Index0, Index1;
21738   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21739   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21740   if (!Src0 || !Src1 || Index0 != Index1 ||
21741       Src0.getValueType().getVectorElementType() != EltVT ||
21742       Src1.getValueType().getVectorElementType() != EltVT ||
21743       !TLI.isExtractVecEltCheap(VT, Index0) ||
21744       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
21745     return SDValue();
21746 
21747   SDLoc DL(N);
21748   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21749   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21750   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21751   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21752 
21753   // If all lanes but 1 are undefined, no need to splat the scalar result.
21754   // TODO: Keep track of undefs and use that info in the general case.
21755   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21756       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21757       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21758     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21759     // build_vec ..undef, (bo X, Y), undef...
21760     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21761     Ops[Index0] = ScalarBO;
21762     return DAG.getBuildVector(VT, DL, Ops);
21763   }
21764 
21765   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21766   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21767   return DAG.getBuildVector(VT, DL, Ops);
21768 }
21769 
21770 /// Visit a binary vector operation, like ADD.
21771 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21772   assert(N->getValueType(0).isVector() &&
21773          "SimplifyVBinOp only works on vectors!");
21774 
21775   SDValue LHS = N->getOperand(0);
21776   SDValue RHS = N->getOperand(1);
21777   SDValue Ops[] = {LHS, RHS};
21778   EVT VT = N->getValueType(0);
21779   unsigned Opcode = N->getOpcode();
21780   SDNodeFlags Flags = N->getFlags();
21781 
21782   // See if we can constant fold the vector operation.
21783   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21784           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21785     return Fold;
21786 
21787   // Move unary shuffles with identical masks after a vector binop:
21788   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21789   //   --> shuffle (VBinOp A, B), Undef, Mask
21790   // This does not require type legality checks because we are creating the
21791   // same types of operations that are in the original sequence. We do have to
21792   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
21793   // though. This code is adapted from the identical transform in instcombine.
21794   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21795       Opcode != ISD::UREM && Opcode != ISD::SREM &&
21796       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21797     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21798     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21799     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21800         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21801         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
21802       SDLoc DL(N);
21803       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21804                                      RHS.getOperand(0), Flags);
21805       SDValue UndefV = LHS.getOperand(1);
21806       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21807     }
21808 
21809     // Try to sink a splat shuffle after a binop with a uniform constant.
21810     // This is limited to cases where neither the shuffle nor the constant have
21811     // undefined elements because that could be poison-unsafe or inhibit
21812     // demanded elements analysis. It is further limited to not change a splat
21813     // of an inserted scalar because that may be optimized better by
21814     // load-folding or other target-specific behaviors.
21815     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21816         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21817         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21818       // binop (splat X), (splat C) --> splat (binop X, C)
21819       SDLoc DL(N);
21820       SDValue X = Shuf0->getOperand(0);
21821       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21822       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21823                                   Shuf0->getMask());
21824     }
21825     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21826         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21827         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21828       // binop (splat C), (splat X) --> splat (binop C, X)
21829       SDLoc DL(N);
21830       SDValue X = Shuf1->getOperand(0);
21831       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
21832       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21833                                   Shuf1->getMask());
21834     }
21835   }
21836 
21837   // The following pattern is likely to emerge with vector reduction ops. Moving
21838   // the binary operation ahead of insertion may allow using a narrower vector
21839   // instruction that has better performance than the wide version of the op:
21840   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
21841   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
21842       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
21843       LHS.getOperand(2) == RHS.getOperand(2) &&
21844       (LHS.hasOneUse() || RHS.hasOneUse())) {
21845     SDValue X = LHS.getOperand(1);
21846     SDValue Y = RHS.getOperand(1);
21847     SDValue Z = LHS.getOperand(2);
21848     EVT NarrowVT = X.getValueType();
21849     if (NarrowVT == Y.getValueType() &&
21850         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
21851                                               LegalOperations)) {
21852       // (binop undef, undef) may not return undef, so compute that result.
21853       SDLoc DL(N);
21854       SDValue VecC =
21855           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
21856       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
21857       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
21858     }
21859   }
21860 
21861   // Make sure all but the first op are undef or constant.
21862   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
21863     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
21864            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
21865              return Op.isUndef() ||
21866                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
21867            });
21868   };
21869 
21870   // The following pattern is likely to emerge with vector reduction ops. Moving
21871   // the binary operation ahead of the concat may allow using a narrower vector
21872   // instruction that has better performance than the wide version of the op:
21873   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
21874   //   concat (VBinOp X, Y), VecC
21875   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
21876       (LHS.hasOneUse() || RHS.hasOneUse())) {
21877     EVT NarrowVT = LHS.getOperand(0).getValueType();
21878     if (NarrowVT == RHS.getOperand(0).getValueType() &&
21879         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
21880       SDLoc DL(N);
21881       unsigned NumOperands = LHS.getNumOperands();
21882       SmallVector<SDValue, 4> ConcatOps;
21883       for (unsigned i = 0; i != NumOperands; ++i) {
21884         // This constant fold for operands 1 and up.
21885         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
21886                                         RHS.getOperand(i)));
21887       }
21888 
21889       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21890     }
21891   }
21892 
21893   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
21894     return V;
21895 
21896   return SDValue();
21897 }
21898 
21899 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
21900                                     SDValue N2) {
21901   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
21902 
21903   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
21904                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
21905 
21906   // If we got a simplified select_cc node back from SimplifySelectCC, then
21907   // break it down into a new SETCC node, and a new SELECT node, and then return
21908   // the SELECT node, since we were called with a SELECT node.
21909   if (SCC.getNode()) {
21910     // Check to see if we got a select_cc back (to turn into setcc/select).
21911     // Otherwise, just return whatever node we got back, like fabs.
21912     if (SCC.getOpcode() == ISD::SELECT_CC) {
21913       const SDNodeFlags Flags = N0.getNode()->getFlags();
21914       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
21915                                   N0.getValueType(),
21916                                   SCC.getOperand(0), SCC.getOperand(1),
21917                                   SCC.getOperand(4), Flags);
21918       AddToWorklist(SETCC.getNode());
21919       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
21920                                          SCC.getOperand(2), SCC.getOperand(3));
21921       SelectNode->setFlags(Flags);
21922       return SelectNode;
21923     }
21924 
21925     return SCC;
21926   }
21927   return SDValue();
21928 }
21929 
21930 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
21931 /// being selected between, see if we can simplify the select.  Callers of this
21932 /// should assume that TheSelect is deleted if this returns true.  As such, they
21933 /// should return the appropriate thing (e.g. the node) back to the top-level of
21934 /// the DAG combiner loop to avoid it being looked at.
21935 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
21936                                     SDValue RHS) {
21937   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21938   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
21939   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
21940     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
21941       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
21942       SDValue Sqrt = RHS;
21943       ISD::CondCode CC;
21944       SDValue CmpLHS;
21945       const ConstantFPSDNode *Zero = nullptr;
21946 
21947       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
21948         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
21949         CmpLHS = TheSelect->getOperand(0);
21950         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
21951       } else {
21952         // SELECT or VSELECT
21953         SDValue Cmp = TheSelect->getOperand(0);
21954         if (Cmp.getOpcode() == ISD::SETCC) {
21955           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
21956           CmpLHS = Cmp.getOperand(0);
21957           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
21958         }
21959       }
21960       if (Zero && Zero->isZero() &&
21961           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
21962           CC == ISD::SETULT || CC == ISD::SETLT)) {
21963         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21964         CombineTo(TheSelect, Sqrt);
21965         return true;
21966       }
21967     }
21968   }
21969   // Cannot simplify select with vector condition
21970   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
21971 
21972   // If this is a select from two identical things, try to pull the operation
21973   // through the select.
21974   if (LHS.getOpcode() != RHS.getOpcode() ||
21975       !LHS.hasOneUse() || !RHS.hasOneUse())
21976     return false;
21977 
21978   // If this is a load and the token chain is identical, replace the select
21979   // of two loads with a load through a select of the address to load from.
21980   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21981   // constants have been dropped into the constant pool.
21982   if (LHS.getOpcode() == ISD::LOAD) {
21983     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21984     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21985 
21986     // Token chains must be identical.
21987     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21988         // Do not let this transformation reduce the number of volatile loads.
21989         // Be conservative for atomics for the moment
21990         // TODO: This does appear to be legal for unordered atomics (see D66309)
21991         !LLD->isSimple() || !RLD->isSimple() ||
21992         // FIXME: If either is a pre/post inc/dec load,
21993         // we'd need to split out the address adjustment.
21994         LLD->isIndexed() || RLD->isIndexed() ||
21995         // If this is an EXTLOAD, the VT's must match.
21996         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21997         // If this is an EXTLOAD, the kind of extension must match.
21998         (LLD->getExtensionType() != RLD->getExtensionType() &&
21999          // The only exception is if one of the extensions is anyext.
22000          LLD->getExtensionType() != ISD::EXTLOAD &&
22001          RLD->getExtensionType() != ISD::EXTLOAD) ||
22002         // FIXME: this discards src value information.  This is
22003         // over-conservative. It would be beneficial to be able to remember
22004         // both potential memory locations.  Since we are discarding
22005         // src value info, don't do the transformation if the memory
22006         // locations are not in the default address space.
22007         LLD->getPointerInfo().getAddrSpace() != 0 ||
22008         RLD->getPointerInfo().getAddrSpace() != 0 ||
22009         // We can't produce a CMOV of a TargetFrameIndex since we won't
22010         // generate the address generation required.
22011         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22012         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
22013         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
22014                                       LLD->getBasePtr().getValueType()))
22015       return false;
22016 
22017     // The loads must not depend on one another.
22018     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
22019       return false;
22020 
22021     // Check that the select condition doesn't reach either load.  If so,
22022     // folding this will induce a cycle into the DAG.  If not, this is safe to
22023     // xform, so create a select of the addresses.
22024 
22025     SmallPtrSet<const SDNode *, 32> Visited;
22026     SmallVector<const SDNode *, 16> Worklist;
22027 
22028     // Always fail if LLD and RLD are not independent. TheSelect is a
22029     // predecessor to all Nodes in question so we need not search past it.
22030 
22031     Visited.insert(TheSelect);
22032     Worklist.push_back(LLD);
22033     Worklist.push_back(RLD);
22034 
22035     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
22036         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
22037       return false;
22038 
22039     SDValue Addr;
22040     if (TheSelect->getOpcode() == ISD::SELECT) {
22041       // We cannot do this optimization if any pair of {RLD, LLD} is a
22042       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
22043       // Loads, we only need to check if CondNode is a successor to one of the
22044       // loads. We can further avoid this if there's no use of their chain
22045       // value.
22046       SDNode *CondNode = TheSelect->getOperand(0).getNode();
22047       Worklist.push_back(CondNode);
22048 
22049       if ((LLD->hasAnyUseOfValue(1) &&
22050            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22051           (RLD->hasAnyUseOfValue(1) &&
22052            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22053         return false;
22054 
22055       Addr = DAG.getSelect(SDLoc(TheSelect),
22056                            LLD->getBasePtr().getValueType(),
22057                            TheSelect->getOperand(0), LLD->getBasePtr(),
22058                            RLD->getBasePtr());
22059     } else {  // Otherwise SELECT_CC
22060       // We cannot do this optimization if any pair of {RLD, LLD} is a
22061       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
22062       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
22063       // one of the loads. We can further avoid this if there's no use of their
22064       // chain value.
22065 
22066       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
22067       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
22068       Worklist.push_back(CondLHS);
22069       Worklist.push_back(CondRHS);
22070 
22071       if ((LLD->hasAnyUseOfValue(1) &&
22072            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
22073           (RLD->hasAnyUseOfValue(1) &&
22074            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
22075         return false;
22076 
22077       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
22078                          LLD->getBasePtr().getValueType(),
22079                          TheSelect->getOperand(0),
22080                          TheSelect->getOperand(1),
22081                          LLD->getBasePtr(), RLD->getBasePtr(),
22082                          TheSelect->getOperand(4));
22083     }
22084 
22085     SDValue Load;
22086     // It is safe to replace the two loads if they have different alignments,
22087     // but the new load must be the minimum (most restrictive) alignment of the
22088     // inputs.
22089     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
22090     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
22091     if (!RLD->isInvariant())
22092       MMOFlags &= ~MachineMemOperand::MOInvariant;
22093     if (!RLD->isDereferenceable())
22094       MMOFlags &= ~MachineMemOperand::MODereferenceable;
22095     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
22096       // FIXME: Discards pointer and AA info.
22097       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
22098                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
22099                          MMOFlags);
22100     } else {
22101       // FIXME: Discards pointer and AA info.
22102       Load = DAG.getExtLoad(
22103           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
22104                                                   : LLD->getExtensionType(),
22105           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
22106           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
22107     }
22108 
22109     // Users of the select now use the result of the load.
22110     CombineTo(TheSelect, Load);
22111 
22112     // Users of the old loads now use the new load's chain.  We know the
22113     // old-load value is dead now.
22114     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
22115     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
22116     return true;
22117   }
22118 
22119   return false;
22120 }
22121 
22122 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
22123 /// bitwise 'and'.
22124 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
22125                                             SDValue N1, SDValue N2, SDValue N3,
22126                                             ISD::CondCode CC) {
22127   // If this is a select where the false operand is zero and the compare is a
22128   // check of the sign bit, see if we can perform the "gzip trick":
22129   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
22130   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
22131   EVT XType = N0.getValueType();
22132   EVT AType = N2.getValueType();
22133   if (!isNullConstant(N3) || !XType.bitsGE(AType))
22134     return SDValue();
22135 
22136   // If the comparison is testing for a positive value, we have to invert
22137   // the sign bit mask, so only do that transform if the target has a bitwise
22138   // 'and not' instruction (the invert is free).
22139   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
22140     // (X > -1) ? A : 0
22141     // (X >  0) ? X : 0 <-- This is canonical signed max.
22142     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
22143       return SDValue();
22144   } else if (CC == ISD::SETLT) {
22145     // (X <  0) ? A : 0
22146     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
22147     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
22148       return SDValue();
22149   } else {
22150     return SDValue();
22151   }
22152 
22153   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
22154   // constant.
22155   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
22156   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22157   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
22158     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
22159     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
22160       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22161       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
22162       AddToWorklist(Shift.getNode());
22163 
22164       if (XType.bitsGT(AType)) {
22165         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22166         AddToWorklist(Shift.getNode());
22167       }
22168 
22169       if (CC == ISD::SETGT)
22170         Shift = DAG.getNOT(DL, Shift, AType);
22171 
22172       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22173     }
22174   }
22175 
22176   unsigned ShCt = XType.getSizeInBits() - 1;
22177   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
22178     return SDValue();
22179 
22180   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
22181   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
22182   AddToWorklist(Shift.getNode());
22183 
22184   if (XType.bitsGT(AType)) {
22185     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
22186     AddToWorklist(Shift.getNode());
22187   }
22188 
22189   if (CC == ISD::SETGT)
22190     Shift = DAG.getNOT(DL, Shift, AType);
22191 
22192   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
22193 }
22194 
22195 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
22196 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
22197   SDValue N0 = N->getOperand(0);
22198   EVT VT = N->getValueType(0);
22199   bool IsFabs = N->getOpcode() == ISD::FABS;
22200   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
22201 
22202   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
22203     return SDValue();
22204 
22205   SDValue Int = N0.getOperand(0);
22206   EVT IntVT = Int.getValueType();
22207 
22208   // The operand to cast should be integer.
22209   if (!IntVT.isInteger() || IntVT.isVector())
22210     return SDValue();
22211 
22212   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
22213   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
22214   APInt SignMask;
22215   if (N0.getValueType().isVector()) {
22216     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
22217     // 0x7f...) per element and splat it.
22218     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
22219     if (IsFabs)
22220       SignMask = ~SignMask;
22221     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
22222   } else {
22223     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
22224     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
22225     if (IsFabs)
22226       SignMask = ~SignMask;
22227   }
22228   SDLoc DL(N0);
22229   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
22230                     DAG.getConstant(SignMask, DL, IntVT));
22231   AddToWorklist(Int.getNode());
22232   return DAG.getBitcast(VT, Int);
22233 }
22234 
22235 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
22236 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
22237 /// in it. This may be a win when the constant is not otherwise available
22238 /// because it replaces two constant pool loads with one.
22239 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
22240     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
22241     ISD::CondCode CC) {
22242   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
22243     return SDValue();
22244 
22245   // If we are before legalize types, we want the other legalization to happen
22246   // first (for example, to avoid messing with soft float).
22247   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
22248   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
22249   EVT VT = N2.getValueType();
22250   if (!TV || !FV || !TLI.isTypeLegal(VT))
22251     return SDValue();
22252 
22253   // If a constant can be materialized without loads, this does not make sense.
22254   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
22255       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
22256       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
22257     return SDValue();
22258 
22259   // If both constants have multiple uses, then we won't need to do an extra
22260   // load. The values are likely around in registers for other users.
22261   if (!TV->hasOneUse() && !FV->hasOneUse())
22262     return SDValue();
22263 
22264   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
22265                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
22266   Type *FPTy = Elts[0]->getType();
22267   const DataLayout &TD = DAG.getDataLayout();
22268 
22269   // Create a ConstantArray of the two constants.
22270   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
22271   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
22272                                       TD.getPrefTypeAlign(FPTy));
22273   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
22274 
22275   // Get offsets to the 0 and 1 elements of the array, so we can select between
22276   // them.
22277   SDValue Zero = DAG.getIntPtrConstant(0, DL);
22278   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
22279   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
22280   SDValue Cond =
22281       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
22282   AddToWorklist(Cond.getNode());
22283   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
22284   AddToWorklist(CstOffset.getNode());
22285   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
22286   AddToWorklist(CPIdx.getNode());
22287   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
22288                      MachinePointerInfo::getConstantPool(
22289                          DAG.getMachineFunction()), Alignment);
22290 }
22291 
22292 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
22293 /// where 'cond' is the comparison specified by CC.
22294 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
22295                                       SDValue N2, SDValue N3, ISD::CondCode CC,
22296                                       bool NotExtCompare) {
22297   // (x ? y : y) -> y.
22298   if (N2 == N3) return N2;
22299 
22300   EVT CmpOpVT = N0.getValueType();
22301   EVT CmpResVT = getSetCCResultType(CmpOpVT);
22302   EVT VT = N2.getValueType();
22303   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
22304   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
22305   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
22306 
22307   // Determine if the condition we're dealing with is constant.
22308   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
22309     AddToWorklist(SCC.getNode());
22310     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
22311       // fold select_cc true, x, y -> x
22312       // fold select_cc false, x, y -> y
22313       return !(SCCC->isNullValue()) ? N2 : N3;
22314     }
22315   }
22316 
22317   if (SDValue V =
22318           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
22319     return V;
22320 
22321   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
22322     return V;
22323 
22324   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
22325   // where y is has a single bit set.
22326   // A plaintext description would be, we can turn the SELECT_CC into an AND
22327   // when the condition can be materialized as an all-ones register.  Any
22328   // single bit-test can be materialized as an all-ones register with
22329   // shift-left and shift-right-arith.
22330   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
22331       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
22332     SDValue AndLHS = N0->getOperand(0);
22333     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
22334     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
22335       // Shift the tested bit over the sign bit.
22336       const APInt &AndMask = ConstAndRHS->getAPIntValue();
22337       unsigned ShCt = AndMask.getBitWidth() - 1;
22338       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
22339         SDValue ShlAmt =
22340           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
22341                           getShiftAmountTy(AndLHS.getValueType()));
22342         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
22343 
22344         // Now arithmetic right shift it all the way over, so the result is
22345         // either all-ones, or zero.
22346         SDValue ShrAmt =
22347           DAG.getConstant(ShCt, SDLoc(Shl),
22348                           getShiftAmountTy(Shl.getValueType()));
22349         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
22350 
22351         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
22352       }
22353     }
22354   }
22355 
22356   // fold select C, 16, 0 -> shl C, 4
22357   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
22358   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
22359 
22360   if ((Fold || Swap) &&
22361       TLI.getBooleanContents(CmpOpVT) ==
22362           TargetLowering::ZeroOrOneBooleanContent &&
22363       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
22364 
22365     if (Swap) {
22366       CC = ISD::getSetCCInverse(CC, CmpOpVT);
22367       std::swap(N2C, N3C);
22368     }
22369 
22370     // If the caller doesn't want us to simplify this into a zext of a compare,
22371     // don't do it.
22372     if (NotExtCompare && N2C->isOne())
22373       return SDValue();
22374 
22375     SDValue Temp, SCC;
22376     // zext (setcc n0, n1)
22377     if (LegalTypes) {
22378       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
22379       if (VT.bitsLT(SCC.getValueType()))
22380         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
22381       else
22382         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22383     } else {
22384       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
22385       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
22386     }
22387 
22388     AddToWorklist(SCC.getNode());
22389     AddToWorklist(Temp.getNode());
22390 
22391     if (N2C->isOne())
22392       return Temp;
22393 
22394     unsigned ShCt = N2C->getAPIntValue().logBase2();
22395     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
22396       return SDValue();
22397 
22398     // shl setcc result by log2 n2c
22399     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
22400                        DAG.getConstant(ShCt, SDLoc(Temp),
22401                                        getShiftAmountTy(Temp.getValueType())));
22402   }
22403 
22404   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
22405   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
22406   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
22407   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
22408   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
22409   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
22410   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
22411   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
22412   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
22413     SDValue ValueOnZero = N2;
22414     SDValue Count = N3;
22415     // If the condition is NE instead of E, swap the operands.
22416     if (CC == ISD::SETNE)
22417       std::swap(ValueOnZero, Count);
22418     // Check if the value on zero is a constant equal to the bits in the type.
22419     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
22420       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
22421         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
22422         // legal, combine to just cttz.
22423         if ((Count.getOpcode() == ISD::CTTZ ||
22424              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
22425             N0 == Count.getOperand(0) &&
22426             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
22427           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
22428         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
22429         // legal, combine to just ctlz.
22430         if ((Count.getOpcode() == ISD::CTLZ ||
22431              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
22432             N0 == Count.getOperand(0) &&
22433             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
22434           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
22435       }
22436     }
22437   }
22438 
22439   return SDValue();
22440 }
22441 
22442 /// This is a stub for TargetLowering::SimplifySetCC.
22443 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
22444                                    ISD::CondCode Cond, const SDLoc &DL,
22445                                    bool foldBooleans) {
22446   TargetLowering::DAGCombinerInfo
22447     DagCombineInfo(DAG, Level, false, this);
22448   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
22449 }
22450 
22451 /// Given an ISD::SDIV node expressing a divide by constant, return
22452 /// a DAG expression to select that will generate the same value by multiplying
22453 /// by a magic number.
22454 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22455 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
22456   // when optimising for minimum size, we don't want to expand a div to a mul
22457   // and a shift.
22458   if (DAG.getMachineFunction().getFunction().hasMinSize())
22459     return SDValue();
22460 
22461   SmallVector<SDNode *, 8> Built;
22462   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22463     for (SDNode *N : Built)
22464       AddToWorklist(N);
22465     return S;
22466   }
22467 
22468   return SDValue();
22469 }
22470 
22471 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22472 /// DAG expression that will generate the same value by right shifting.
22473 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22474   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22475   if (!C)
22476     return SDValue();
22477 
22478   // Avoid division by zero.
22479   if (C->isNullValue())
22480     return SDValue();
22481 
22482   SmallVector<SDNode *, 8> Built;
22483   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22484     for (SDNode *N : Built)
22485       AddToWorklist(N);
22486     return S;
22487   }
22488 
22489   return SDValue();
22490 }
22491 
22492 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22493 /// expression that will generate the same value by multiplying by a magic
22494 /// number.
22495 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22496 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22497   // when optimising for minimum size, we don't want to expand a div to a mul
22498   // and a shift.
22499   if (DAG.getMachineFunction().getFunction().hasMinSize())
22500     return SDValue();
22501 
22502   SmallVector<SDNode *, 8> Built;
22503   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22504     for (SDNode *N : Built)
22505       AddToWorklist(N);
22506     return S;
22507   }
22508 
22509   return SDValue();
22510 }
22511 
22512 /// Determines the LogBase2 value for a non-null input value using the
22513 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22514 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22515   EVT VT = V.getValueType();
22516   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22517   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22518   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22519   return LogBase2;
22520 }
22521 
22522 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22523 /// For the reciprocal, we need to find the zero of the function:
22524 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
22525 ///     =>
22526 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22527 ///     does not require additional intermediate precision]
22528 /// For the last iteration, put numerator N into it to gain more precision:
22529 ///   Result = N X_i + X_i (N - N A X_i)
22530 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22531                                       SDNodeFlags Flags) {
22532   if (LegalDAG)
22533     return SDValue();
22534 
22535   // TODO: Handle half and/or extended types?
22536   EVT VT = Op.getValueType();
22537   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22538     return SDValue();
22539 
22540   // If estimates are explicitly disabled for this function, we're done.
22541   MachineFunction &MF = DAG.getMachineFunction();
22542   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22543   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22544     return SDValue();
22545 
22546   // Estimates may be explicitly enabled for this type with a custom number of
22547   // refinement steps.
22548   int Iterations = TLI.getDivRefinementSteps(VT, MF);
22549   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22550     AddToWorklist(Est.getNode());
22551 
22552     SDLoc DL(Op);
22553     if (Iterations) {
22554       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22555 
22556       // Newton iterations: Est = Est + Est (N - Arg * Est)
22557       // If this is the last iteration, also multiply by the numerator.
22558       for (int i = 0; i < Iterations; ++i) {
22559         SDValue MulEst = Est;
22560 
22561         if (i == Iterations - 1) {
22562           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22563           AddToWorklist(MulEst.getNode());
22564         }
22565 
22566         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22567         AddToWorklist(NewEst.getNode());
22568 
22569         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22570                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22571         AddToWorklist(NewEst.getNode());
22572 
22573         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22574         AddToWorklist(NewEst.getNode());
22575 
22576         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22577         AddToWorklist(Est.getNode());
22578       }
22579     } else {
22580       // If no iterations are available, multiply with N.
22581       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22582       AddToWorklist(Est.getNode());
22583     }
22584 
22585     return Est;
22586   }
22587 
22588   return SDValue();
22589 }
22590 
22591 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22592 /// For the reciprocal sqrt, we need to find the zero of the function:
22593 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22594 ///     =>
22595 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22596 /// As a result, we precompute A/2 prior to the iteration loop.
22597 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22598                                          unsigned Iterations,
22599                                          SDNodeFlags Flags, bool Reciprocal) {
22600   EVT VT = Arg.getValueType();
22601   SDLoc DL(Arg);
22602   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22603 
22604   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22605   // this entire sequence requires only one FP constant.
22606   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22607   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22608 
22609   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22610   for (unsigned i = 0; i < Iterations; ++i) {
22611     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22612     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22613     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22614     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22615   }
22616 
22617   // If non-reciprocal square root is requested, multiply the result by Arg.
22618   if (!Reciprocal)
22619     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22620 
22621   return Est;
22622 }
22623 
22624 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22625 /// For the reciprocal sqrt, we need to find the zero of the function:
22626 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22627 ///     =>
22628 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22629 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22630                                          unsigned Iterations,
22631                                          SDNodeFlags Flags, bool Reciprocal) {
22632   EVT VT = Arg.getValueType();
22633   SDLoc DL(Arg);
22634   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22635   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22636 
22637   // This routine must enter the loop below to work correctly
22638   // when (Reciprocal == false).
22639   assert(Iterations > 0);
22640 
22641   // Newton iterations for reciprocal square root:
22642   // E = (E * -0.5) * ((A * E) * E + -3.0)
22643   for (unsigned i = 0; i < Iterations; ++i) {
22644     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22645     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22646     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22647 
22648     // When calculating a square root at the last iteration build:
22649     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22650     // (notice a common subexpression)
22651     SDValue LHS;
22652     if (Reciprocal || (i + 1) < Iterations) {
22653       // RSQRT: LHS = (E * -0.5)
22654       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22655     } else {
22656       // SQRT: LHS = (A * E) * -0.5
22657       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22658     }
22659 
22660     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22661   }
22662 
22663   return Est;
22664 }
22665 
22666 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22667 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22668 /// Op can be zero.
22669 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22670                                            bool Reciprocal) {
22671   if (LegalDAG)
22672     return SDValue();
22673 
22674   // TODO: Handle half and/or extended types?
22675   EVT VT = Op.getValueType();
22676   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22677     return SDValue();
22678 
22679   // If estimates are explicitly disabled for this function, we're done.
22680   MachineFunction &MF = DAG.getMachineFunction();
22681   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22682   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22683     return SDValue();
22684 
22685   // Estimates may be explicitly enabled for this type with a custom number of
22686   // refinement steps.
22687   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22688 
22689   bool UseOneConstNR = false;
22690   if (SDValue Est =
22691       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22692                           Reciprocal)) {
22693     AddToWorklist(Est.getNode());
22694 
22695     if (Iterations)
22696       Est = UseOneConstNR
22697             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22698             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22699     if (!Reciprocal) {
22700       SDLoc DL(Op);
22701       // Try the target specific test first.
22702       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
22703 
22704       // The estimate is now completely wrong if the input was exactly 0.0 or
22705       // possibly a denormal. Force the answer to 0.0 or value provided by
22706       // target for those cases.
22707       Est = DAG.getNode(
22708           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22709           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22710     }
22711     return Est;
22712   }
22713 
22714   return SDValue();
22715 }
22716 
22717 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22718   return buildSqrtEstimateImpl(Op, Flags, true);
22719 }
22720 
22721 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22722   return buildSqrtEstimateImpl(Op, Flags, false);
22723 }
22724 
22725 /// Return true if there is any possibility that the two addresses overlap.
22726 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
22727 
22728   struct MemUseCharacteristics {
22729     bool IsVolatile;
22730     bool IsAtomic;
22731     SDValue BasePtr;
22732     int64_t Offset;
22733     Optional<int64_t> NumBytes;
22734     MachineMemOperand *MMO;
22735   };
22736 
22737   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
22738     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
22739       int64_t Offset = 0;
22740       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
22741         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
22742                      ? C->getSExtValue()
22743                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
22744                            ? -1 * C->getSExtValue()
22745                            : 0;
22746       uint64_t Size =
22747           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22748       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22749               Offset /*base offset*/,
22750               Optional<int64_t>(Size),
22751               LSN->getMemOperand()};
22752     }
22753     if (const auto *LN = cast<LifetimeSDNode>(N))
22754       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
22755               (LN->hasOffset()) ? LN->getOffset() : 0,
22756               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22757                                 : Optional<int64_t>(),
22758               (MachineMemOperand *)nullptr};
22759     // Default.
22760     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
22761             (int64_t)0 /*offset*/,
22762             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
22763   };
22764 
22765   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22766                         MUC1 = getCharacteristics(Op1);
22767 
22768   // If they are to the same address, then they must be aliases.
22769   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22770       MUC0.Offset == MUC1.Offset)
22771     return true;
22772 
22773   // If they are both volatile then they cannot be reordered.
22774   if (MUC0.IsVolatile && MUC1.IsVolatile)
22775     return true;
22776 
22777   // Be conservative about atomics for the moment
22778   // TODO: This is way overconservative for unordered atomics (see D66309)
22779   if (MUC0.IsAtomic && MUC1.IsAtomic)
22780     return true;
22781 
22782   if (MUC0.MMO && MUC1.MMO) {
22783     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22784         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22785       return false;
22786   }
22787 
22788   // Try to prove that there is aliasing, or that there is no aliasing. Either
22789   // way, we can return now. If nothing can be proved, proceed with more tests.
22790   bool IsAlias;
22791   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
22792                                        DAG, IsAlias))
22793     return IsAlias;
22794 
22795   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
22796   // either are not known.
22797   if (!MUC0.MMO || !MUC1.MMO)
22798     return true;
22799 
22800   // If one operation reads from invariant memory, and the other may store, they
22801   // cannot alias. These should really be checking the equivalent of mayWrite,
22802   // but it only matters for memory nodes other than load /store.
22803   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22804       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22805     return false;
22806 
22807   // If we know required SrcValue1 and SrcValue2 have relatively large
22808   // alignment compared to the size and offset of the access, we may be able
22809   // to prove they do not alias. This check is conservative for now to catch
22810   // cases created by splitting vector types, it only works when the offsets are
22811   // multiples of the size of the data.
22812   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
22813   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
22814   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
22815   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
22816   auto &Size0 = MUC0.NumBytes;
22817   auto &Size1 = MUC1.NumBytes;
22818   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
22819       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
22820       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
22821       SrcValOffset1 % *Size1 == 0) {
22822     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
22823     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
22824 
22825     // There is no overlap between these relatively aligned accesses of
22826     // similar size. Return no alias.
22827     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
22828       return false;
22829   }
22830 
22831   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
22832                    ? CombinerGlobalAA
22833                    : DAG.getSubtarget().useAA();
22834 #ifndef NDEBUG
22835   if (CombinerAAOnlyFunc.getNumOccurrences() &&
22836       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
22837     UseAA = false;
22838 #endif
22839 
22840   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
22841       Size0.hasValue() && Size1.hasValue()) {
22842     // Use alias analysis information.
22843     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
22844     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
22845     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
22846     if (AA->isNoAlias(
22847             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
22848                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
22849             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
22850                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
22851       return false;
22852   }
22853 
22854   // Otherwise we have to assume they alias.
22855   return true;
22856 }
22857 
22858 /// Walk up chain skipping non-aliasing memory nodes,
22859 /// looking for aliasing nodes and adding them to the Aliases vector.
22860 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
22861                                    SmallVectorImpl<SDValue> &Aliases) {
22862   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
22863   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
22864 
22865   // Get alias information for node.
22866   // TODO: relax aliasing for unordered atomics (see D66309)
22867   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
22868 
22869   // Starting off.
22870   Chains.push_back(OriginalChain);
22871   unsigned Depth = 0;
22872 
22873   // Attempt to improve chain by a single step
22874   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
22875     switch (C.getOpcode()) {
22876     case ISD::EntryToken:
22877       // No need to mark EntryToken.
22878       C = SDValue();
22879       return true;
22880     case ISD::LOAD:
22881     case ISD::STORE: {
22882       // Get alias information for C.
22883       // TODO: Relax aliasing for unordered atomics (see D66309)
22884       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
22885                       cast<LSBaseSDNode>(C.getNode())->isSimple();
22886       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
22887         // Look further up the chain.
22888         C = C.getOperand(0);
22889         return true;
22890       }
22891       // Alias, so stop here.
22892       return false;
22893     }
22894 
22895     case ISD::CopyFromReg:
22896       // Always forward past past CopyFromReg.
22897       C = C.getOperand(0);
22898       return true;
22899 
22900     case ISD::LIFETIME_START:
22901     case ISD::LIFETIME_END: {
22902       // We can forward past any lifetime start/end that can be proven not to
22903       // alias the memory access.
22904       if (!isAlias(N, C.getNode())) {
22905         // Look further up the chain.
22906         C = C.getOperand(0);
22907         return true;
22908       }
22909       return false;
22910     }
22911     default:
22912       return false;
22913     }
22914   };
22915 
22916   // Look at each chain and determine if it is an alias.  If so, add it to the
22917   // aliases list.  If not, then continue up the chain looking for the next
22918   // candidate.
22919   while (!Chains.empty()) {
22920     SDValue Chain = Chains.pop_back_val();
22921 
22922     // Don't bother if we've seen Chain before.
22923     if (!Visited.insert(Chain.getNode()).second)
22924       continue;
22925 
22926     // For TokenFactor nodes, look at each operand and only continue up the
22927     // chain until we reach the depth limit.
22928     //
22929     // FIXME: The depth check could be made to return the last non-aliasing
22930     // chain we found before we hit a tokenfactor rather than the original
22931     // chain.
22932     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
22933       Aliases.clear();
22934       Aliases.push_back(OriginalChain);
22935       return;
22936     }
22937 
22938     if (Chain.getOpcode() == ISD::TokenFactor) {
22939       // We have to check each of the operands of the token factor for "small"
22940       // token factors, so we queue them up.  Adding the operands to the queue
22941       // (stack) in reverse order maintains the original order and increases the
22942       // likelihood that getNode will find a matching token factor (CSE.)
22943       if (Chain.getNumOperands() > 16) {
22944         Aliases.push_back(Chain);
22945         continue;
22946       }
22947       for (unsigned n = Chain.getNumOperands(); n;)
22948         Chains.push_back(Chain.getOperand(--n));
22949       ++Depth;
22950       continue;
22951     }
22952     // Everything else
22953     if (ImproveChain(Chain)) {
22954       // Updated Chain Found, Consider new chain if one exists.
22955       if (Chain.getNode())
22956         Chains.push_back(Chain);
22957       ++Depth;
22958       continue;
22959     }
22960     // No Improved Chain Possible, treat as Alias.
22961     Aliases.push_back(Chain);
22962   }
22963 }
22964 
22965 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
22966 /// (aliasing node.)
22967 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
22968   if (OptLevel == CodeGenOpt::None)
22969     return OldChain;
22970 
22971   // Ops for replacing token factor.
22972   SmallVector<SDValue, 8> Aliases;
22973 
22974   // Accumulate all the aliases to this node.
22975   GatherAllAliases(N, OldChain, Aliases);
22976 
22977   // If no operands then chain to entry token.
22978   if (Aliases.size() == 0)
22979     return DAG.getEntryNode();
22980 
22981   // If a single operand then chain to it.  We don't need to revisit it.
22982   if (Aliases.size() == 1)
22983     return Aliases[0];
22984 
22985   // Construct a custom tailored token factor.
22986   return DAG.getTokenFactor(SDLoc(N), Aliases);
22987 }
22988 
22989 namespace {
22990 // TODO: Replace with with std::monostate when we move to C++17.
22991 struct UnitT { } Unit;
22992 bool operator==(const UnitT &, const UnitT &) { return true; }
22993 bool operator!=(const UnitT &, const UnitT &) { return false; }
22994 } // namespace
22995 
22996 // This function tries to collect a bunch of potentially interesting
22997 // nodes to improve the chains of, all at once. This might seem
22998 // redundant, as this function gets called when visiting every store
22999 // node, so why not let the work be done on each store as it's visited?
23000 //
23001 // I believe this is mainly important because mergeConsecutiveStores
23002 // is unable to deal with merging stores of different sizes, so unless
23003 // we improve the chains of all the potential candidates up-front
23004 // before running mergeConsecutiveStores, it might only see some of
23005 // the nodes that will eventually be candidates, and then not be able
23006 // to go from a partially-merged state to the desired final
23007 // fully-merged state.
23008 
23009 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
23010   SmallVector<StoreSDNode *, 8> ChainedStores;
23011   StoreSDNode *STChain = St;
23012   // Intervals records which offsets from BaseIndex have been covered. In
23013   // the common case, every store writes to the immediately previous address
23014   // space and thus merged with the previous interval at insertion time.
23015 
23016   using IMap =
23017       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
23018   IMap::Allocator A;
23019   IMap Intervals(A);
23020 
23021   // This holds the base pointer, index, and the offset in bytes from the base
23022   // pointer.
23023   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23024 
23025   // We must have a base and an offset.
23026   if (!BasePtr.getBase().getNode())
23027     return false;
23028 
23029   // Do not handle stores to undef base pointers.
23030   if (BasePtr.getBase().isUndef())
23031     return false;
23032 
23033   // BaseIndexOffset assumes that offsets are fixed-size, which
23034   // is not valid for scalable vectors where the offsets are
23035   // scaled by `vscale`, so bail out early.
23036   if (St->getMemoryVT().isScalableVector())
23037     return false;
23038 
23039   // Add ST's interval.
23040   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
23041 
23042   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
23043     if (Chain->getMemoryVT().isScalableVector())
23044       return false;
23045 
23046     // If the chain has more than one use, then we can't reorder the mem ops.
23047     if (!SDValue(Chain, 0)->hasOneUse())
23048       break;
23049     // TODO: Relax for unordered atomics (see D66309)
23050     if (!Chain->isSimple() || Chain->isIndexed())
23051       break;
23052 
23053     // Find the base pointer and offset for this memory node.
23054     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
23055     // Check that the base pointer is the same as the original one.
23056     int64_t Offset;
23057     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
23058       break;
23059     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
23060     // Make sure we don't overlap with other intervals by checking the ones to
23061     // the left or right before inserting.
23062     auto I = Intervals.find(Offset);
23063     // If there's a next interval, we should end before it.
23064     if (I != Intervals.end() && I.start() < (Offset + Length))
23065       break;
23066     // If there's a previous interval, we should start after it.
23067     if (I != Intervals.begin() && (--I).stop() <= Offset)
23068       break;
23069     Intervals.insert(Offset, Offset + Length, Unit);
23070 
23071     ChainedStores.push_back(Chain);
23072     STChain = Chain;
23073   }
23074 
23075   // If we didn't find a chained store, exit.
23076   if (ChainedStores.size() == 0)
23077     return false;
23078 
23079   // Improve all chained stores (St and ChainedStores members) starting from
23080   // where the store chain ended and return single TokenFactor.
23081   SDValue NewChain = STChain->getChain();
23082   SmallVector<SDValue, 8> TFOps;
23083   for (unsigned I = ChainedStores.size(); I;) {
23084     StoreSDNode *S = ChainedStores[--I];
23085     SDValue BetterChain = FindBetterChain(S, NewChain);
23086     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
23087         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
23088     TFOps.push_back(SDValue(S, 0));
23089     ChainedStores[I] = S;
23090   }
23091 
23092   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
23093   SDValue BetterChain = FindBetterChain(St, NewChain);
23094   SDValue NewST;
23095   if (St->isTruncatingStore())
23096     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
23097                               St->getBasePtr(), St->getMemoryVT(),
23098                               St->getMemOperand());
23099   else
23100     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
23101                          St->getBasePtr(), St->getMemOperand());
23102 
23103   TFOps.push_back(NewST);
23104 
23105   // If we improved every element of TFOps, then we've lost the dependence on
23106   // NewChain to successors of St and we need to add it back to TFOps. Do so at
23107   // the beginning to keep relative order consistent with FindBetterChains.
23108   auto hasImprovedChain = [&](SDValue ST) -> bool {
23109     return ST->getOperand(0) != NewChain;
23110   };
23111   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
23112   if (AddNewChain)
23113     TFOps.insert(TFOps.begin(), NewChain);
23114 
23115   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
23116   CombineTo(St, TF);
23117 
23118   // Add TF and its operands to the worklist.
23119   AddToWorklist(TF.getNode());
23120   for (const SDValue &Op : TF->ops())
23121     AddToWorklist(Op.getNode());
23122   AddToWorklist(STChain);
23123   return true;
23124 }
23125 
23126 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
23127   if (OptLevel == CodeGenOpt::None)
23128     return false;
23129 
23130   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
23131 
23132   // We must have a base and an offset.
23133   if (!BasePtr.getBase().getNode())
23134     return false;
23135 
23136   // Do not handle stores to undef base pointers.
23137   if (BasePtr.getBase().isUndef())
23138     return false;
23139 
23140   // Directly improve a chain of disjoint stores starting at St.
23141   if (parallelizeChainedStores(St))
23142     return true;
23143 
23144   // Improve St's Chain..
23145   SDValue BetterChain = FindBetterChain(St, St->getChain());
23146   if (St->getChain() != BetterChain) {
23147     replaceStoreChain(St, BetterChain);
23148     return true;
23149   }
23150   return false;
23151 }
23152 
23153 /// This is the entry point for the file.
23154 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
23155                            CodeGenOpt::Level OptLevel) {
23156   /// This is the main entry point to this class.
23157   DAGCombiner(*this, AA, OptLevel).Run(Level);
23158 }
23159