1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
27 #include "llvm/ADT/SmallBitVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/AliasAnalysis.h"
33 #include "llvm/Analysis/MemoryLocation.h"
34 #include "llvm/Analysis/TargetLibraryInfo.h"
35 #include "llvm/Analysis/VectorUtils.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFrameInfo.h"
39 #include "llvm/CodeGen/MachineFunction.h"
40 #include "llvm/CodeGen/MachineMemOperand.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/LLVMContext.h"
56 #include "llvm/IR/Metadata.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/CommandLine.h"
60 #include "llvm/Support/Compiler.h"
61 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MachineValueType.h"
65 #include "llvm/Support/MathExtras.h"
66 #include "llvm/Support/raw_ostream.h"
67 #include "llvm/Target/TargetMachine.h"
68 #include "llvm/Target/TargetOptions.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <functional>
73 #include <iterator>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "dagcombine"
81 
82 STATISTIC(NodesCombined   , "Number of dag nodes combined");
83 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
84 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
85 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
86 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
87 STATISTIC(SlicedLoads, "Number of load sliced");
88 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
89 
90 static cl::opt<bool>
91 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
92                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
93 
94 static cl::opt<bool>
95 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
96         cl::desc("Enable DAG combiner's use of TBAA"));
97 
98 #ifndef NDEBUG
99 static cl::opt<std::string>
100 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
101                    cl::desc("Only use DAG-combiner alias analysis in this"
102                             " function"));
103 #endif
104 
105 /// Hidden option to stress test load slicing, i.e., when this option
106 /// is enabled, load slicing bypasses most of its profitability guards.
107 static cl::opt<bool>
108 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
109                   cl::desc("Bypass the profitability model of load slicing"),
110                   cl::init(false));
111 
112 static cl::opt<bool>
113   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
114                     cl::desc("DAG combiner may split indexing from loads"));
115 
116 static cl::opt<bool>
117     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
118                        cl::desc("DAG combiner enable merging multiple stores "
119                                 "into a wider store"));
120 
121 static cl::opt<unsigned> TokenFactorInlineLimit(
122     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
123     cl::desc("Limit the number of operands to inline for Token Factors"));
124 
125 static cl::opt<unsigned> StoreMergeDependenceLimit(
126     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
127     cl::desc("Limit the number of times for the same StoreNode and RootNode "
128              "to bail out in store merging dependence check"));
129 
130 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
131     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
132     cl::desc("DAG cominber enable reducing the width of load/op/store "
133              "sequence"));
134 
135 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
136     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
137     cl::desc("DAG cominber enable load/<replace bytes>/store with "
138              "a narrower store"));
139 
140 namespace {
141 
142   class DAGCombiner {
143     SelectionDAG &DAG;
144     const TargetLowering &TLI;
145     const SelectionDAGTargetInfo *STI;
146     CombineLevel Level;
147     CodeGenOpt::Level OptLevel;
148     bool LegalDAG = false;
149     bool LegalOperations = false;
150     bool LegalTypes = false;
151     bool ForCodeSize;
152     bool DisableGenericCombines;
153 
154     /// Worklist of all of the nodes that need to be simplified.
155     ///
156     /// This must behave as a stack -- new nodes to process are pushed onto the
157     /// back and when processing we pop off of the back.
158     ///
159     /// The worklist will not contain duplicates but may contain null entries
160     /// due to nodes being deleted from the underlying DAG.
161     SmallVector<SDNode *, 64> Worklist;
162 
163     /// Mapping from an SDNode to its position on the worklist.
164     ///
165     /// This is used to find and remove nodes from the worklist (by nulling
166     /// them) when they are deleted from the underlying DAG. It relies on
167     /// stable indices of nodes within the worklist.
168     DenseMap<SDNode *, unsigned> WorklistMap;
169     /// This records all nodes attempted to add to the worklist since we
170     /// considered a new worklist entry. As we keep do not add duplicate nodes
171     /// in the worklist, this is different from the tail of the worklist.
172     SmallSetVector<SDNode *, 32> PruningList;
173 
174     /// Set of nodes which have been combined (at least once).
175     ///
176     /// This is used to allow us to reliably add any operands of a DAG node
177     /// which have not yet been combined to the worklist.
178     SmallPtrSet<SDNode *, 32> CombinedNodes;
179 
180     /// Map from candidate StoreNode to the pair of RootNode and count.
181     /// The count is used to track how many times we have seen the StoreNode
182     /// with the same RootNode bail out in dependence check. If we have seen
183     /// the bail out for the same pair many times over a limit, we won't
184     /// consider the StoreNode with the same RootNode as store merging
185     /// candidate again.
186     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
187 
188     // AA - Used for DAG load/store alias analysis.
189     AliasAnalysis *AA;
190 
191     /// When an instruction is simplified, add all users of the instruction to
192     /// the work lists because they might get more simplified now.
193     void AddUsersToWorklist(SDNode *N) {
194       for (SDNode *Node : N->uses())
195         AddToWorklist(Node);
196     }
197 
198     /// Convenient shorthand to add a node and all of its user to the worklist.
199     void AddToWorklistWithUsers(SDNode *N) {
200       AddUsersToWorklist(N);
201       AddToWorklist(N);
202     }
203 
204     // Prune potentially dangling nodes. This is called after
205     // any visit to a node, but should also be called during a visit after any
206     // failed combine which may have created a DAG node.
207     void clearAddedDanglingWorklistEntries() {
208       // Check any nodes added to the worklist to see if they are prunable.
209       while (!PruningList.empty()) {
210         auto *N = PruningList.pop_back_val();
211         if (N->use_empty())
212           recursivelyDeleteUnusedNodes(N);
213       }
214     }
215 
216     SDNode *getNextWorklistEntry() {
217       // Before we do any work, remove nodes that are not in use.
218       clearAddedDanglingWorklistEntries();
219       SDNode *N = nullptr;
220       // The Worklist holds the SDNodes in order, but it may contain null
221       // entries.
222       while (!N && !Worklist.empty()) {
223         N = Worklist.pop_back_val();
224       }
225 
226       if (N) {
227         bool GoodWorklistEntry = WorklistMap.erase(N);
228         (void)GoodWorklistEntry;
229         assert(GoodWorklistEntry &&
230                "Found a worklist entry without a corresponding map entry!");
231       }
232       return N;
233     }
234 
235     /// Call the node-specific routine that folds each particular type of node.
236     SDValue visit(SDNode *N);
237 
238   public:
239     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
240         : DAG(D), TLI(D.getTargetLoweringInfo()),
241           STI(D.getSubtarget().getSelectionDAGInfo()),
242           Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) {
243       ForCodeSize = DAG.shouldOptForSize();
244       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
245 
246       MaximumLegalStoreInBits = 0;
247       // We use the minimum store size here, since that's all we can guarantee
248       // for the scalable vector types.
249       for (MVT VT : MVT::all_valuetypes())
250         if (EVT(VT).isSimple() && VT != MVT::Other &&
251             TLI.isTypeLegal(EVT(VT)) &&
252             VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits)
253           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize();
254     }
255 
256     void ConsiderForPruning(SDNode *N) {
257       // Mark this for potential pruning.
258       PruningList.insert(N);
259     }
260 
261     /// Add to the worklist making sure its instance is at the back (next to be
262     /// processed.)
263     void AddToWorklist(SDNode *N) {
264       assert(N->getOpcode() != ISD::DELETED_NODE &&
265              "Deleted Node added to Worklist");
266 
267       // Skip handle nodes as they can't usefully be combined and confuse the
268       // zero-use deletion strategy.
269       if (N->getOpcode() == ISD::HANDLENODE)
270         return;
271 
272       ConsiderForPruning(N);
273 
274       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
275         Worklist.push_back(N);
276     }
277 
278     /// Remove all instances of N from the worklist.
279     void removeFromWorklist(SDNode *N) {
280       CombinedNodes.erase(N);
281       PruningList.remove(N);
282       StoreRootCountMap.erase(N);
283 
284       auto It = WorklistMap.find(N);
285       if (It == WorklistMap.end())
286         return; // Not in the worklist.
287 
288       // Null out the entry rather than erasing it to avoid a linear operation.
289       Worklist[It->second] = nullptr;
290       WorklistMap.erase(It);
291     }
292 
293     void deleteAndRecombine(SDNode *N);
294     bool recursivelyDeleteUnusedNodes(SDNode *N);
295 
296     /// Replaces all uses of the results of one DAG node with new values.
297     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
298                       bool AddTo = true);
299 
300     /// Replaces all uses of the results of one DAG node with new values.
301     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
302       return CombineTo(N, &Res, 1, AddTo);
303     }
304 
305     /// Replaces all uses of the results of one DAG node with new values.
306     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
307                       bool AddTo = true) {
308       SDValue To[] = { Res0, Res1 };
309       return CombineTo(N, To, 2, AddTo);
310     }
311 
312     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
313 
314   private:
315     unsigned MaximumLegalStoreInBits;
316 
317     /// Check the specified integer node value to see if it can be simplified or
318     /// if things it uses can be simplified by bit propagation.
319     /// If so, return true.
320     bool SimplifyDemandedBits(SDValue Op) {
321       unsigned BitWidth = Op.getScalarValueSizeInBits();
322       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
323       return SimplifyDemandedBits(Op, DemandedBits);
324     }
325 
326     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
327       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
328       KnownBits Known;
329       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
330         return false;
331 
332       // Revisit the node.
333       AddToWorklist(Op.getNode());
334 
335       CommitTargetLoweringOpt(TLO);
336       return true;
337     }
338 
339     /// Check the specified vector node value to see if it can be simplified or
340     /// if things it uses can be simplified as it only uses some of the
341     /// elements. If so, return true.
342     bool SimplifyDemandedVectorElts(SDValue Op) {
343       // TODO: For now just pretend it cannot be simplified.
344       if (Op.getValueType().isScalableVector())
345         return false;
346 
347       unsigned NumElts = Op.getValueType().getVectorNumElements();
348       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
349       return SimplifyDemandedVectorElts(Op, DemandedElts);
350     }
351 
352     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
353                               const APInt &DemandedElts,
354                               bool AssumeSingleUse = false);
355     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
356                                     bool AssumeSingleUse = false);
357 
358     bool CombineToPreIndexedLoadStore(SDNode *N);
359     bool CombineToPostIndexedLoadStore(SDNode *N);
360     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
361     bool SliceUpLoad(SDNode *N);
362 
363     // Scalars have size 0 to distinguish from singleton vectors.
364     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
365     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
366     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
367 
368     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
369     ///   load.
370     ///
371     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
372     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
373     /// \param EltNo index of the vector element to load.
374     /// \param OriginalLoad load that EVE came from to be replaced.
375     /// \returns EVE on success SDValue() on failure.
376     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
377                                          SDValue EltNo,
378                                          LoadSDNode *OriginalLoad);
379     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
380     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
381     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
382     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
383     SDValue PromoteIntBinOp(SDValue Op);
384     SDValue PromoteIntShiftOp(SDValue Op);
385     SDValue PromoteExtend(SDValue Op);
386     bool PromoteLoad(SDValue Op);
387 
388     /// Call the node-specific routine that knows how to fold each
389     /// particular type of node. If that doesn't do anything, try the
390     /// target-specific DAG combines.
391     SDValue combine(SDNode *N);
392 
393     // Visitation implementation - Implement dag node combining for different
394     // node types.  The semantics are as follows:
395     // Return Value:
396     //   SDValue.getNode() == 0 - No change was made
397     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
398     //   otherwise              - N should be replaced by the returned Operand.
399     //
400     SDValue visitTokenFactor(SDNode *N);
401     SDValue visitMERGE_VALUES(SDNode *N);
402     SDValue visitADD(SDNode *N);
403     SDValue visitADDLike(SDNode *N);
404     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
405     SDValue visitSUB(SDNode *N);
406     SDValue visitADDSAT(SDNode *N);
407     SDValue visitSUBSAT(SDNode *N);
408     SDValue visitADDC(SDNode *N);
409     SDValue visitADDO(SDNode *N);
410     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
411     SDValue visitSUBC(SDNode *N);
412     SDValue visitSUBO(SDNode *N);
413     SDValue visitADDE(SDNode *N);
414     SDValue visitADDCARRY(SDNode *N);
415     SDValue visitSADDO_CARRY(SDNode *N);
416     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
417     SDValue visitSUBE(SDNode *N);
418     SDValue visitSUBCARRY(SDNode *N);
419     SDValue visitSSUBO_CARRY(SDNode *N);
420     SDValue visitMUL(SDNode *N);
421     SDValue visitMULFIX(SDNode *N);
422     SDValue useDivRem(SDNode *N);
423     SDValue visitSDIV(SDNode *N);
424     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
425     SDValue visitUDIV(SDNode *N);
426     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
427     SDValue visitREM(SDNode *N);
428     SDValue visitMULHU(SDNode *N);
429     SDValue visitMULHS(SDNode *N);
430     SDValue visitSMUL_LOHI(SDNode *N);
431     SDValue visitUMUL_LOHI(SDNode *N);
432     SDValue visitMULO(SDNode *N);
433     SDValue visitIMINMAX(SDNode *N);
434     SDValue visitAND(SDNode *N);
435     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
436     SDValue visitOR(SDNode *N);
437     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
438     SDValue visitXOR(SDNode *N);
439     SDValue SimplifyVBinOp(SDNode *N);
440     SDValue visitSHL(SDNode *N);
441     SDValue visitSRA(SDNode *N);
442     SDValue visitSRL(SDNode *N);
443     SDValue visitFunnelShift(SDNode *N);
444     SDValue visitRotate(SDNode *N);
445     SDValue visitABS(SDNode *N);
446     SDValue visitBSWAP(SDNode *N);
447     SDValue visitBITREVERSE(SDNode *N);
448     SDValue visitCTLZ(SDNode *N);
449     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
450     SDValue visitCTTZ(SDNode *N);
451     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
452     SDValue visitCTPOP(SDNode *N);
453     SDValue visitSELECT(SDNode *N);
454     SDValue visitVSELECT(SDNode *N);
455     SDValue visitSELECT_CC(SDNode *N);
456     SDValue visitSETCC(SDNode *N);
457     SDValue visitSETCCCARRY(SDNode *N);
458     SDValue visitSIGN_EXTEND(SDNode *N);
459     SDValue visitZERO_EXTEND(SDNode *N);
460     SDValue visitANY_EXTEND(SDNode *N);
461     SDValue visitAssertExt(SDNode *N);
462     SDValue visitAssertAlign(SDNode *N);
463     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
464     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
465     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
466     SDValue visitTRUNCATE(SDNode *N);
467     SDValue visitBITCAST(SDNode *N);
468     SDValue visitFREEZE(SDNode *N);
469     SDValue visitBUILD_PAIR(SDNode *N);
470     SDValue visitFADD(SDNode *N);
471     SDValue visitSTRICT_FADD(SDNode *N);
472     SDValue visitFSUB(SDNode *N);
473     SDValue visitFMUL(SDNode *N);
474     SDValue visitFMA(SDNode *N);
475     SDValue visitFDIV(SDNode *N);
476     SDValue visitFREM(SDNode *N);
477     SDValue visitFSQRT(SDNode *N);
478     SDValue visitFCOPYSIGN(SDNode *N);
479     SDValue visitFPOW(SDNode *N);
480     SDValue visitSINT_TO_FP(SDNode *N);
481     SDValue visitUINT_TO_FP(SDNode *N);
482     SDValue visitFP_TO_SINT(SDNode *N);
483     SDValue visitFP_TO_UINT(SDNode *N);
484     SDValue visitFP_ROUND(SDNode *N);
485     SDValue visitFP_EXTEND(SDNode *N);
486     SDValue visitFNEG(SDNode *N);
487     SDValue visitFABS(SDNode *N);
488     SDValue visitFCEIL(SDNode *N);
489     SDValue visitFTRUNC(SDNode *N);
490     SDValue visitFFLOOR(SDNode *N);
491     SDValue visitFMINNUM(SDNode *N);
492     SDValue visitFMAXNUM(SDNode *N);
493     SDValue visitFMINIMUM(SDNode *N);
494     SDValue visitFMAXIMUM(SDNode *N);
495     SDValue visitBRCOND(SDNode *N);
496     SDValue visitBR_CC(SDNode *N);
497     SDValue visitLOAD(SDNode *N);
498 
499     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
500     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
501 
502     SDValue visitSTORE(SDNode *N);
503     SDValue visitLIFETIME_END(SDNode *N);
504     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
505     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
506     SDValue visitBUILD_VECTOR(SDNode *N);
507     SDValue visitCONCAT_VECTORS(SDNode *N);
508     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
509     SDValue visitVECTOR_SHUFFLE(SDNode *N);
510     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
511     SDValue visitINSERT_SUBVECTOR(SDNode *N);
512     SDValue visitMLOAD(SDNode *N);
513     SDValue visitMSTORE(SDNode *N);
514     SDValue visitMGATHER(SDNode *N);
515     SDValue visitMSCATTER(SDNode *N);
516     SDValue visitFP_TO_FP16(SDNode *N);
517     SDValue visitFP16_TO_FP(SDNode *N);
518     SDValue visitVECREDUCE(SDNode *N);
519 
520     SDValue visitFADDForFMACombine(SDNode *N);
521     SDValue visitFSUBForFMACombine(SDNode *N);
522     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
523 
524     SDValue XformToShuffleWithZero(SDNode *N);
525     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
526                                                     const SDLoc &DL, SDValue N0,
527                                                     SDValue N1);
528     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
529                                       SDValue N1);
530     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
531                            SDValue N1, SDNodeFlags Flags);
532 
533     SDValue visitShiftByConstant(SDNode *N);
534 
535     SDValue foldSelectOfConstants(SDNode *N);
536     SDValue foldVSelectOfConstants(SDNode *N);
537     SDValue foldBinOpIntoSelect(SDNode *BO);
538     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
539     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
540     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
541     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
542                              SDValue N2, SDValue N3, ISD::CondCode CC,
543                              bool NotExtCompare = false);
544     SDValue convertSelectOfFPConstantsToLoadOffset(
545         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
546         ISD::CondCode CC);
547     SDValue foldSignChangeInBitcast(SDNode *N);
548     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
549                                    SDValue N2, SDValue N3, ISD::CondCode CC);
550     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
551                               const SDLoc &DL);
552     SDValue unfoldMaskedMerge(SDNode *N);
553     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
554     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
555                           const SDLoc &DL, bool foldBooleans);
556     SDValue rebuildSetCC(SDValue N);
557 
558     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
559                            SDValue &CC, bool MatchStrict = false) const;
560     bool isOneUseSetCC(SDValue N) const;
561 
562     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
563                                          unsigned HiOp);
564     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
565     SDValue CombineExtLoad(SDNode *N);
566     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
567     SDValue combineRepeatedFPDivisors(SDNode *N);
568     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
569     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
570     SDValue BuildSDIV(SDNode *N);
571     SDValue BuildSDIVPow2(SDNode *N);
572     SDValue BuildUDIV(SDNode *N);
573     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
574     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
575     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
576     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
577     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
578     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
579                                 SDNodeFlags Flags, bool Reciprocal);
580     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
581                                 SDNodeFlags Flags, bool Reciprocal);
582     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
583                                bool DemandHighBits = true);
584     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
585     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
586                               SDValue InnerPos, SDValue InnerNeg,
587                               unsigned PosOpcode, unsigned NegOpcode,
588                               const SDLoc &DL);
589     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
590                               SDValue InnerPos, SDValue InnerNeg,
591                               unsigned PosOpcode, unsigned NegOpcode,
592                               const SDLoc &DL);
593     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
594     SDValue MatchLoadCombine(SDNode *N);
595     SDValue mergeTruncStores(StoreSDNode *N);
596     SDValue ReduceLoadWidth(SDNode *N);
597     SDValue ReduceLoadOpStoreWidth(SDNode *N);
598     SDValue splitMergedValStore(StoreSDNode *ST);
599     SDValue TransformFPLoadStorePair(SDNode *N);
600     SDValue convertBuildVecZextToZext(SDNode *N);
601     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
602     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
603     SDValue reduceBuildVecToShuffle(SDNode *N);
604     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
605                                   ArrayRef<int> VectorMask, SDValue VecIn1,
606                                   SDValue VecIn2, unsigned LeftIdx,
607                                   bool DidSplitVec);
608     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
609 
610     /// Walk up chain skipping non-aliasing memory nodes,
611     /// looking for aliasing nodes and adding them to the Aliases vector.
612     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
613                           SmallVectorImpl<SDValue> &Aliases);
614 
615     /// Return true if there is any possibility that the two addresses overlap.
616     bool isAlias(SDNode *Op0, SDNode *Op1) const;
617 
618     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
619     /// chain (aliasing node.)
620     SDValue FindBetterChain(SDNode *N, SDValue Chain);
621 
622     /// Try to replace a store and any possibly adjacent stores on
623     /// consecutive chains with better chains. Return true only if St is
624     /// replaced.
625     ///
626     /// Notice that other chains may still be replaced even if the function
627     /// returns false.
628     bool findBetterNeighborChains(StoreSDNode *St);
629 
630     // Helper for findBetterNeighborChains. Walk up store chain add additional
631     // chained stores that do not overlap and can be parallelized.
632     bool parallelizeChainedStores(StoreSDNode *St);
633 
634     /// Holds a pointer to an LSBaseSDNode as well as information on where it
635     /// is located in a sequence of memory operations connected by a chain.
636     struct MemOpLink {
637       // Ptr to the mem node.
638       LSBaseSDNode *MemNode;
639 
640       // Offset from the base ptr.
641       int64_t OffsetFromBase;
642 
643       MemOpLink(LSBaseSDNode *N, int64_t Offset)
644           : MemNode(N), OffsetFromBase(Offset) {}
645     };
646 
647     // Classify the origin of a stored value.
648     enum class StoreSource { Unknown, Constant, Extract, Load };
649     StoreSource getStoreSource(SDValue StoreVal) {
650       switch (StoreVal.getOpcode()) {
651       case ISD::Constant:
652       case ISD::ConstantFP:
653         return StoreSource::Constant;
654       case ISD::EXTRACT_VECTOR_ELT:
655       case ISD::EXTRACT_SUBVECTOR:
656         return StoreSource::Extract;
657       case ISD::LOAD:
658         return StoreSource::Load;
659       default:
660         return StoreSource::Unknown;
661       }
662     }
663 
664     /// This is a helper function for visitMUL to check the profitability
665     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
666     /// MulNode is the original multiply, AddNode is (add x, c1),
667     /// and ConstNode is c2.
668     bool isMulAddWithConstProfitable(SDNode *MulNode,
669                                      SDValue &AddNode,
670                                      SDValue &ConstNode);
671 
672     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
673     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
674     /// the type of the loaded value to be extended.
675     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
676                           EVT LoadResultTy, EVT &ExtVT);
677 
678     /// Helper function to calculate whether the given Load/Store can have its
679     /// width reduced to ExtVT.
680     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
681                            EVT &MemVT, unsigned ShAmt = 0);
682 
683     /// Used by BackwardsPropagateMask to find suitable loads.
684     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
685                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
686                            ConstantSDNode *Mask, SDNode *&NodeToMask);
687     /// Attempt to propagate a given AND node back to load leaves so that they
688     /// can be combined into narrow loads.
689     bool BackwardsPropagateMask(SDNode *N);
690 
691     /// Helper function for mergeConsecutiveStores which merges the component
692     /// store chains.
693     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
694                                 unsigned NumStores);
695 
696     /// This is a helper function for mergeConsecutiveStores. When the source
697     /// elements of the consecutive stores are all constants or all extracted
698     /// vector elements, try to merge them into one larger store introducing
699     /// bitcasts if necessary.  \return True if a merged store was created.
700     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
701                                          EVT MemVT, unsigned NumStores,
702                                          bool IsConstantSrc, bool UseVector,
703                                          bool UseTrunc);
704 
705     /// This is a helper function for mergeConsecutiveStores. Stores that
706     /// potentially may be merged with St are placed in StoreNodes. RootNode is
707     /// a chain predecessor to all store candidates.
708     void getStoreMergeCandidates(StoreSDNode *St,
709                                  SmallVectorImpl<MemOpLink> &StoreNodes,
710                                  SDNode *&Root);
711 
712     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
713     /// have indirect dependency through their operands. RootNode is the
714     /// predecessor to all stores calculated by getStoreMergeCandidates and is
715     /// used to prune the dependency check. \return True if safe to merge.
716     bool checkMergeStoreCandidatesForDependencies(
717         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
718         SDNode *RootNode);
719 
720     /// This is a helper function for mergeConsecutiveStores. Given a list of
721     /// store candidates, find the first N that are consecutive in memory.
722     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
723     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
724                                   int64_t ElementSizeBytes) const;
725 
726     /// This is a helper function for mergeConsecutiveStores. It is used for
727     /// store chains that are composed entirely of constant values.
728     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
729                                   unsigned NumConsecutiveStores,
730                                   EVT MemVT, SDNode *Root, bool AllowVectors);
731 
732     /// This is a helper function for mergeConsecutiveStores. It is used for
733     /// store chains that are composed entirely of extracted vector elements.
734     /// When extracting multiple vector elements, try to store them in one
735     /// vector store rather than a sequence of scalar stores.
736     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
737                                  unsigned NumConsecutiveStores, EVT MemVT,
738                                  SDNode *Root);
739 
740     /// This is a helper function for mergeConsecutiveStores. It is used for
741     /// store chains that are composed entirely of loaded values.
742     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
743                               unsigned NumConsecutiveStores, EVT MemVT,
744                               SDNode *Root, bool AllowVectors,
745                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
746 
747     /// Merge consecutive store operations into a wide store.
748     /// This optimization uses wide integers or vectors when possible.
749     /// \return true if stores were merged.
750     bool mergeConsecutiveStores(StoreSDNode *St);
751 
752     /// Try to transform a truncation where C is a constant:
753     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
754     ///
755     /// \p N needs to be a truncation and its first operand an AND. Other
756     /// requirements are checked by the function (e.g. that trunc is
757     /// single-use) and if missed an empty SDValue is returned.
758     SDValue distributeTruncateThroughAnd(SDNode *N);
759 
760     /// Helper function to determine whether the target supports operation
761     /// given by \p Opcode for type \p VT, that is, whether the operation
762     /// is legal or custom before legalizing operations, and whether is
763     /// legal (but not custom) after legalization.
764     bool hasOperation(unsigned Opcode, EVT VT) {
765       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
766     }
767 
768   public:
769     /// Runs the dag combiner on all nodes in the work list
770     void Run(CombineLevel AtLevel);
771 
772     SelectionDAG &getDAG() const { return DAG; }
773 
774     /// Returns a type large enough to hold any valid shift amount - before type
775     /// legalization these can be huge.
776     EVT getShiftAmountTy(EVT LHSTy) {
777       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
778       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
779     }
780 
781     /// This method returns true if we are running before type legalization or
782     /// if the specified VT is legal.
783     bool isTypeLegal(const EVT &VT) {
784       if (!LegalTypes) return true;
785       return TLI.isTypeLegal(VT);
786     }
787 
788     /// Convenience wrapper around TargetLowering::getSetCCResultType
789     EVT getSetCCResultType(EVT VT) const {
790       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
791     }
792 
793     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
794                          SDValue OrigLoad, SDValue ExtLoad,
795                          ISD::NodeType ExtType);
796   };
797 
798 /// This class is a DAGUpdateListener that removes any deleted
799 /// nodes from the worklist.
800 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
801   DAGCombiner &DC;
802 
803 public:
804   explicit WorklistRemover(DAGCombiner &dc)
805     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
806 
807   void NodeDeleted(SDNode *N, SDNode *E) override {
808     DC.removeFromWorklist(N);
809   }
810 };
811 
812 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
813   DAGCombiner &DC;
814 
815 public:
816   explicit WorklistInserter(DAGCombiner &dc)
817       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
818 
819   // FIXME: Ideally we could add N to the worklist, but this causes exponential
820   //        compile time costs in large DAGs, e.g. Halide.
821   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
822 };
823 
824 } // end anonymous namespace
825 
826 //===----------------------------------------------------------------------===//
827 //  TargetLowering::DAGCombinerInfo implementation
828 //===----------------------------------------------------------------------===//
829 
830 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
831   ((DAGCombiner*)DC)->AddToWorklist(N);
832 }
833 
834 SDValue TargetLowering::DAGCombinerInfo::
835 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
836   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
837 }
838 
839 SDValue TargetLowering::DAGCombinerInfo::
840 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
841   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
842 }
843 
844 SDValue TargetLowering::DAGCombinerInfo::
845 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
846   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
847 }
848 
849 bool TargetLowering::DAGCombinerInfo::
850 recursivelyDeleteUnusedNodes(SDNode *N) {
851   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
852 }
853 
854 void TargetLowering::DAGCombinerInfo::
855 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
856   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
857 }
858 
859 //===----------------------------------------------------------------------===//
860 // Helper Functions
861 //===----------------------------------------------------------------------===//
862 
863 void DAGCombiner::deleteAndRecombine(SDNode *N) {
864   removeFromWorklist(N);
865 
866   // If the operands of this node are only used by the node, they will now be
867   // dead. Make sure to re-visit them and recursively delete dead nodes.
868   for (const SDValue &Op : N->ops())
869     // For an operand generating multiple values, one of the values may
870     // become dead allowing further simplification (e.g. split index
871     // arithmetic from an indexed load).
872     if (Op->hasOneUse() || Op->getNumValues() > 1)
873       AddToWorklist(Op.getNode());
874 
875   DAG.DeleteNode(N);
876 }
877 
878 // APInts must be the same size for most operations, this helper
879 // function zero extends the shorter of the pair so that they match.
880 // We provide an Offset so that we can create bitwidths that won't overflow.
881 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
882   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
883   LHS = LHS.zextOrSelf(Bits);
884   RHS = RHS.zextOrSelf(Bits);
885 }
886 
887 // Return true if this node is a setcc, or is a select_cc
888 // that selects between the target values used for true and false, making it
889 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
890 // the appropriate nodes based on the type of node we are checking. This
891 // simplifies life a bit for the callers.
892 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
893                                     SDValue &CC, bool MatchStrict) const {
894   if (N.getOpcode() == ISD::SETCC) {
895     LHS = N.getOperand(0);
896     RHS = N.getOperand(1);
897     CC  = N.getOperand(2);
898     return true;
899   }
900 
901   if (MatchStrict &&
902       (N.getOpcode() == ISD::STRICT_FSETCC ||
903        N.getOpcode() == ISD::STRICT_FSETCCS)) {
904     LHS = N.getOperand(1);
905     RHS = N.getOperand(2);
906     CC  = N.getOperand(3);
907     return true;
908   }
909 
910   if (N.getOpcode() != ISD::SELECT_CC ||
911       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
912       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
913     return false;
914 
915   if (TLI.getBooleanContents(N.getValueType()) ==
916       TargetLowering::UndefinedBooleanContent)
917     return false;
918 
919   LHS = N.getOperand(0);
920   RHS = N.getOperand(1);
921   CC  = N.getOperand(4);
922   return true;
923 }
924 
925 /// Return true if this is a SetCC-equivalent operation with only one use.
926 /// If this is true, it allows the users to invert the operation for free when
927 /// it is profitable to do so.
928 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
929   SDValue N0, N1, N2;
930   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
931     return true;
932   return false;
933 }
934 
935 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
936   if (!ScalarTy.isSimple())
937     return false;
938 
939   uint64_t MaskForTy = 0ULL;
940   switch (ScalarTy.getSimpleVT().SimpleTy) {
941   case MVT::i8:
942     MaskForTy = 0xFFULL;
943     break;
944   case MVT::i16:
945     MaskForTy = 0xFFFFULL;
946     break;
947   case MVT::i32:
948     MaskForTy = 0xFFFFFFFFULL;
949     break;
950   default:
951     return false;
952     break;
953   }
954 
955   APInt Val;
956   if (ISD::isConstantSplatVector(N, Val))
957     return Val.getLimitedValue() == MaskForTy;
958 
959   return false;
960 }
961 
962 // Returns the SDNode if it is a constant float BuildVector
963 // or constant float.
964 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
965   if (isa<ConstantFPSDNode>(N))
966     return N.getNode();
967   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
968     return N.getNode();
969   return nullptr;
970 }
971 
972 // Determines if it is a constant integer or a build vector of constant
973 // integers (and undefs).
974 // Do not permit build vector implicit truncation.
975 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
976   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
977     return !(Const->isOpaque() && NoOpaques);
978   if (N.getOpcode() != ISD::BUILD_VECTOR)
979     return false;
980   unsigned BitWidth = N.getScalarValueSizeInBits();
981   for (const SDValue &Op : N->op_values()) {
982     if (Op.isUndef())
983       continue;
984     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
985     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
986         (Const->isOpaque() && NoOpaques))
987       return false;
988   }
989   return true;
990 }
991 
992 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
993 // undef's.
994 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
995   if (V.getOpcode() != ISD::BUILD_VECTOR)
996     return false;
997   return isConstantOrConstantVector(V, NoOpaques) ||
998          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
999 }
1000 
1001 // Determine if this an indexed load with an opaque target constant index.
1002 static bool canSplitIdx(LoadSDNode *LD) {
1003   return MaySplitLoadIndex &&
1004          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1005           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1006 }
1007 
1008 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1009                                                              const SDLoc &DL,
1010                                                              SDValue N0,
1011                                                              SDValue N1) {
1012   // Currently this only tries to ensure we don't undo the GEP splits done by
1013   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1014   // we check if the following transformation would be problematic:
1015   // (load/store (add, (add, x, offset1), offset2)) ->
1016   // (load/store (add, x, offset1+offset2)).
1017 
1018   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1019     return false;
1020 
1021   if (N0.hasOneUse())
1022     return false;
1023 
1024   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1025   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1026   if (!C1 || !C2)
1027     return false;
1028 
1029   const APInt &C1APIntVal = C1->getAPIntValue();
1030   const APInt &C2APIntVal = C2->getAPIntValue();
1031   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1032     return false;
1033 
1034   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1035   if (CombinedValueIntVal.getBitWidth() > 64)
1036     return false;
1037   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1038 
1039   for (SDNode *Node : N0->uses()) {
1040     auto LoadStore = dyn_cast<MemSDNode>(Node);
1041     if (LoadStore) {
1042       // Is x[offset2] already not a legal addressing mode? If so then
1043       // reassociating the constants breaks nothing (we test offset2 because
1044       // that's the one we hope to fold into the load or store).
1045       TargetLoweringBase::AddrMode AM;
1046       AM.HasBaseReg = true;
1047       AM.BaseOffs = C2APIntVal.getSExtValue();
1048       EVT VT = LoadStore->getMemoryVT();
1049       unsigned AS = LoadStore->getAddressSpace();
1050       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1051       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1052         continue;
1053 
1054       // Would x[offset1+offset2] still be a legal addressing mode?
1055       AM.BaseOffs = CombinedValue;
1056       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1057         return true;
1058     }
1059   }
1060 
1061   return false;
1062 }
1063 
1064 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1065 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1066 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1067                                                SDValue N0, SDValue N1) {
1068   EVT VT = N0.getValueType();
1069 
1070   if (N0.getOpcode() != Opc)
1071     return SDValue();
1072 
1073   if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1074     if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1075       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1076       if (SDValue OpNode =
1077               DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
1078         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1079       return SDValue();
1080     }
1081     if (N0.hasOneUse()) {
1082       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1083       //              iff (op x, c1) has one use
1084       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1085       if (!OpNode.getNode())
1086         return SDValue();
1087       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1088     }
1089   }
1090   return SDValue();
1091 }
1092 
1093 // Try to reassociate commutative binops.
1094 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1095                                     SDValue N1, SDNodeFlags Flags) {
1096   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1097 
1098   // Floating-point reassociation is not allowed without loose FP math.
1099   if (N0.getValueType().isFloatingPoint() ||
1100       N1.getValueType().isFloatingPoint())
1101     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1102       return SDValue();
1103 
1104   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1105     return Combined;
1106   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1107     return Combined;
1108   return SDValue();
1109 }
1110 
1111 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1112                                bool AddTo) {
1113   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1114   ++NodesCombined;
1115   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1116              To[0].getNode()->dump(&DAG);
1117              dbgs() << " and " << NumTo - 1 << " other values\n");
1118   for (unsigned i = 0, e = NumTo; i != e; ++i)
1119     assert((!To[i].getNode() ||
1120             N->getValueType(i) == To[i].getValueType()) &&
1121            "Cannot combine value to value of different type!");
1122 
1123   WorklistRemover DeadNodes(*this);
1124   DAG.ReplaceAllUsesWith(N, To);
1125   if (AddTo) {
1126     // Push the new nodes and any users onto the worklist
1127     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1128       if (To[i].getNode()) {
1129         AddToWorklist(To[i].getNode());
1130         AddUsersToWorklist(To[i].getNode());
1131       }
1132     }
1133   }
1134 
1135   // Finally, if the node is now dead, remove it from the graph.  The node
1136   // may not be dead if the replacement process recursively simplified to
1137   // something else needing this node.
1138   if (N->use_empty())
1139     deleteAndRecombine(N);
1140   return SDValue(N, 0);
1141 }
1142 
1143 void DAGCombiner::
1144 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1145   // Replace the old value with the new one.
1146   ++NodesCombined;
1147   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1148              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1149              dbgs() << '\n');
1150 
1151   // Replace all uses.  If any nodes become isomorphic to other nodes and
1152   // are deleted, make sure to remove them from our worklist.
1153   WorklistRemover DeadNodes(*this);
1154   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1155 
1156   // Push the new node and any (possibly new) users onto the worklist.
1157   AddToWorklistWithUsers(TLO.New.getNode());
1158 
1159   // Finally, if the node is now dead, remove it from the graph.  The node
1160   // may not be dead if the replacement process recursively simplified to
1161   // something else needing this node.
1162   if (TLO.Old.getNode()->use_empty())
1163     deleteAndRecombine(TLO.Old.getNode());
1164 }
1165 
1166 /// Check the specified integer node value to see if it can be simplified or if
1167 /// things it uses can be simplified by bit propagation. If so, return true.
1168 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1169                                        const APInt &DemandedElts,
1170                                        bool AssumeSingleUse) {
1171   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1172   KnownBits Known;
1173   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1174                                 AssumeSingleUse))
1175     return false;
1176 
1177   // Revisit the node.
1178   AddToWorklist(Op.getNode());
1179 
1180   CommitTargetLoweringOpt(TLO);
1181   return true;
1182 }
1183 
1184 /// Check the specified vector node value to see if it can be simplified or
1185 /// if things it uses can be simplified as it only uses some of the elements.
1186 /// If so, return true.
1187 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1188                                              const APInt &DemandedElts,
1189                                              bool AssumeSingleUse) {
1190   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1191   APInt KnownUndef, KnownZero;
1192   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1193                                       TLO, 0, AssumeSingleUse))
1194     return false;
1195 
1196   // Revisit the node.
1197   AddToWorklist(Op.getNode());
1198 
1199   CommitTargetLoweringOpt(TLO);
1200   return true;
1201 }
1202 
1203 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1204   SDLoc DL(Load);
1205   EVT VT = Load->getValueType(0);
1206   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1207 
1208   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1209              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1210   WorklistRemover DeadNodes(*this);
1211   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1212   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1213   deleteAndRecombine(Load);
1214   AddToWorklist(Trunc.getNode());
1215 }
1216 
1217 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1218   Replace = false;
1219   SDLoc DL(Op);
1220   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1221     LoadSDNode *LD = cast<LoadSDNode>(Op);
1222     EVT MemVT = LD->getMemoryVT();
1223     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1224                                                       : LD->getExtensionType();
1225     Replace = true;
1226     return DAG.getExtLoad(ExtType, DL, PVT,
1227                           LD->getChain(), LD->getBasePtr(),
1228                           MemVT, LD->getMemOperand());
1229   }
1230 
1231   unsigned Opc = Op.getOpcode();
1232   switch (Opc) {
1233   default: break;
1234   case ISD::AssertSext:
1235     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1236       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1237     break;
1238   case ISD::AssertZext:
1239     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1240       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1241     break;
1242   case ISD::Constant: {
1243     unsigned ExtOpc =
1244       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1245     return DAG.getNode(ExtOpc, DL, PVT, Op);
1246   }
1247   }
1248 
1249   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1250     return SDValue();
1251   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1252 }
1253 
1254 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1255   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1256     return SDValue();
1257   EVT OldVT = Op.getValueType();
1258   SDLoc DL(Op);
1259   bool Replace = false;
1260   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1261   if (!NewOp.getNode())
1262     return SDValue();
1263   AddToWorklist(NewOp.getNode());
1264 
1265   if (Replace)
1266     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1267   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1268                      DAG.getValueType(OldVT));
1269 }
1270 
1271 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1272   EVT OldVT = Op.getValueType();
1273   SDLoc DL(Op);
1274   bool Replace = false;
1275   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1276   if (!NewOp.getNode())
1277     return SDValue();
1278   AddToWorklist(NewOp.getNode());
1279 
1280   if (Replace)
1281     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1282   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1283 }
1284 
1285 /// Promote the specified integer binary operation if the target indicates it is
1286 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1287 /// i32 since i16 instructions are longer.
1288 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1289   if (!LegalOperations)
1290     return SDValue();
1291 
1292   EVT VT = Op.getValueType();
1293   if (VT.isVector() || !VT.isInteger())
1294     return SDValue();
1295 
1296   // If operation type is 'undesirable', e.g. i16 on x86, consider
1297   // promoting it.
1298   unsigned Opc = Op.getOpcode();
1299   if (TLI.isTypeDesirableForOp(Opc, VT))
1300     return SDValue();
1301 
1302   EVT PVT = VT;
1303   // Consult target whether it is a good idea to promote this operation and
1304   // what's the right type to promote it to.
1305   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1306     assert(PVT != VT && "Don't know what type to promote to!");
1307 
1308     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1309 
1310     bool Replace0 = false;
1311     SDValue N0 = Op.getOperand(0);
1312     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1313 
1314     bool Replace1 = false;
1315     SDValue N1 = Op.getOperand(1);
1316     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1317     SDLoc DL(Op);
1318 
1319     SDValue RV =
1320         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1321 
1322     // We are always replacing N0/N1's use in N and only need additional
1323     // replacements if there are additional uses.
1324     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1325     //       (SDValue) here because the node may reference multiple values
1326     //       (for example, the chain value of a load node).
1327     Replace0 &= !N0->hasOneUse();
1328     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1329 
1330     // Combine Op here so it is preserved past replacements.
1331     CombineTo(Op.getNode(), RV);
1332 
1333     // If operands have a use ordering, make sure we deal with
1334     // predecessor first.
1335     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1336       std::swap(N0, N1);
1337       std::swap(NN0, NN1);
1338     }
1339 
1340     if (Replace0) {
1341       AddToWorklist(NN0.getNode());
1342       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1343     }
1344     if (Replace1) {
1345       AddToWorklist(NN1.getNode());
1346       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1347     }
1348     return Op;
1349   }
1350   return SDValue();
1351 }
1352 
1353 /// Promote the specified integer shift operation if the target indicates it is
1354 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1355 /// i32 since i16 instructions are longer.
1356 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1357   if (!LegalOperations)
1358     return SDValue();
1359 
1360   EVT VT = Op.getValueType();
1361   if (VT.isVector() || !VT.isInteger())
1362     return SDValue();
1363 
1364   // If operation type is 'undesirable', e.g. i16 on x86, consider
1365   // promoting it.
1366   unsigned Opc = Op.getOpcode();
1367   if (TLI.isTypeDesirableForOp(Opc, VT))
1368     return SDValue();
1369 
1370   EVT PVT = VT;
1371   // Consult target whether it is a good idea to promote this operation and
1372   // what's the right type to promote it to.
1373   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1374     assert(PVT != VT && "Don't know what type to promote to!");
1375 
1376     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1377 
1378     bool Replace = false;
1379     SDValue N0 = Op.getOperand(0);
1380     SDValue N1 = Op.getOperand(1);
1381     if (Opc == ISD::SRA)
1382       N0 = SExtPromoteOperand(N0, PVT);
1383     else if (Opc == ISD::SRL)
1384       N0 = ZExtPromoteOperand(N0, PVT);
1385     else
1386       N0 = PromoteOperand(N0, PVT, Replace);
1387 
1388     if (!N0.getNode())
1389       return SDValue();
1390 
1391     SDLoc DL(Op);
1392     SDValue RV =
1393         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1394 
1395     if (Replace)
1396       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1397 
1398     // Deal with Op being deleted.
1399     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1400       return RV;
1401   }
1402   return SDValue();
1403 }
1404 
1405 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1406   if (!LegalOperations)
1407     return SDValue();
1408 
1409   EVT VT = Op.getValueType();
1410   if (VT.isVector() || !VT.isInteger())
1411     return SDValue();
1412 
1413   // If operation type is 'undesirable', e.g. i16 on x86, consider
1414   // promoting it.
1415   unsigned Opc = Op.getOpcode();
1416   if (TLI.isTypeDesirableForOp(Opc, VT))
1417     return SDValue();
1418 
1419   EVT PVT = VT;
1420   // Consult target whether it is a good idea to promote this operation and
1421   // what's the right type to promote it to.
1422   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1423     assert(PVT != VT && "Don't know what type to promote to!");
1424     // fold (aext (aext x)) -> (aext x)
1425     // fold (aext (zext x)) -> (zext x)
1426     // fold (aext (sext x)) -> (sext x)
1427     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1428     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1429   }
1430   return SDValue();
1431 }
1432 
1433 bool DAGCombiner::PromoteLoad(SDValue Op) {
1434   if (!LegalOperations)
1435     return false;
1436 
1437   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1438     return false;
1439 
1440   EVT VT = Op.getValueType();
1441   if (VT.isVector() || !VT.isInteger())
1442     return false;
1443 
1444   // If operation type is 'undesirable', e.g. i16 on x86, consider
1445   // promoting it.
1446   unsigned Opc = Op.getOpcode();
1447   if (TLI.isTypeDesirableForOp(Opc, VT))
1448     return false;
1449 
1450   EVT PVT = VT;
1451   // Consult target whether it is a good idea to promote this operation and
1452   // what's the right type to promote it to.
1453   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1454     assert(PVT != VT && "Don't know what type to promote to!");
1455 
1456     SDLoc DL(Op);
1457     SDNode *N = Op.getNode();
1458     LoadSDNode *LD = cast<LoadSDNode>(N);
1459     EVT MemVT = LD->getMemoryVT();
1460     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1461                                                       : LD->getExtensionType();
1462     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1463                                    LD->getChain(), LD->getBasePtr(),
1464                                    MemVT, LD->getMemOperand());
1465     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1466 
1467     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1468                Result.getNode()->dump(&DAG); dbgs() << '\n');
1469     WorklistRemover DeadNodes(*this);
1470     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1471     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1472     deleteAndRecombine(N);
1473     AddToWorklist(Result.getNode());
1474     return true;
1475   }
1476   return false;
1477 }
1478 
1479 /// Recursively delete a node which has no uses and any operands for
1480 /// which it is the only use.
1481 ///
1482 /// Note that this both deletes the nodes and removes them from the worklist.
1483 /// It also adds any nodes who have had a user deleted to the worklist as they
1484 /// may now have only one use and subject to other combines.
1485 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1486   if (!N->use_empty())
1487     return false;
1488 
1489   SmallSetVector<SDNode *, 16> Nodes;
1490   Nodes.insert(N);
1491   do {
1492     N = Nodes.pop_back_val();
1493     if (!N)
1494       continue;
1495 
1496     if (N->use_empty()) {
1497       for (const SDValue &ChildN : N->op_values())
1498         Nodes.insert(ChildN.getNode());
1499 
1500       removeFromWorklist(N);
1501       DAG.DeleteNode(N);
1502     } else {
1503       AddToWorklist(N);
1504     }
1505   } while (!Nodes.empty());
1506   return true;
1507 }
1508 
1509 //===----------------------------------------------------------------------===//
1510 //  Main DAG Combiner implementation
1511 //===----------------------------------------------------------------------===//
1512 
1513 void DAGCombiner::Run(CombineLevel AtLevel) {
1514   // set the instance variables, so that the various visit routines may use it.
1515   Level = AtLevel;
1516   LegalDAG = Level >= AfterLegalizeDAG;
1517   LegalOperations = Level >= AfterLegalizeVectorOps;
1518   LegalTypes = Level >= AfterLegalizeTypes;
1519 
1520   WorklistInserter AddNodes(*this);
1521 
1522   // Add all the dag nodes to the worklist.
1523   for (SDNode &Node : DAG.allnodes())
1524     AddToWorklist(&Node);
1525 
1526   // Create a dummy node (which is not added to allnodes), that adds a reference
1527   // to the root node, preventing it from being deleted, and tracking any
1528   // changes of the root.
1529   HandleSDNode Dummy(DAG.getRoot());
1530 
1531   // While we have a valid worklist entry node, try to combine it.
1532   while (SDNode *N = getNextWorklistEntry()) {
1533     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1534     // N is deleted from the DAG, since they too may now be dead or may have a
1535     // reduced number of uses, allowing other xforms.
1536     if (recursivelyDeleteUnusedNodes(N))
1537       continue;
1538 
1539     WorklistRemover DeadNodes(*this);
1540 
1541     // If this combine is running after legalizing the DAG, re-legalize any
1542     // nodes pulled off the worklist.
1543     if (LegalDAG) {
1544       SmallSetVector<SDNode *, 16> UpdatedNodes;
1545       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1546 
1547       for (SDNode *LN : UpdatedNodes)
1548         AddToWorklistWithUsers(LN);
1549 
1550       if (!NIsValid)
1551         continue;
1552     }
1553 
1554     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1555 
1556     // Add any operands of the new node which have not yet been combined to the
1557     // worklist as well. Because the worklist uniques things already, this
1558     // won't repeatedly process the same operand.
1559     CombinedNodes.insert(N);
1560     for (const SDValue &ChildN : N->op_values())
1561       if (!CombinedNodes.count(ChildN.getNode()))
1562         AddToWorklist(ChildN.getNode());
1563 
1564     SDValue RV = combine(N);
1565 
1566     if (!RV.getNode())
1567       continue;
1568 
1569     ++NodesCombined;
1570 
1571     // If we get back the same node we passed in, rather than a new node or
1572     // zero, we know that the node must have defined multiple values and
1573     // CombineTo was used.  Since CombineTo takes care of the worklist
1574     // mechanics for us, we have no work to do in this case.
1575     if (RV.getNode() == N)
1576       continue;
1577 
1578     assert(N->getOpcode() != ISD::DELETED_NODE &&
1579            RV.getOpcode() != ISD::DELETED_NODE &&
1580            "Node was deleted but visit returned new node!");
1581 
1582     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1583 
1584     if (N->getNumValues() == RV.getNode()->getNumValues())
1585       DAG.ReplaceAllUsesWith(N, RV.getNode());
1586     else {
1587       assert(N->getValueType(0) == RV.getValueType() &&
1588              N->getNumValues() == 1 && "Type mismatch");
1589       DAG.ReplaceAllUsesWith(N, &RV);
1590     }
1591 
1592     // Push the new node and any users onto the worklist.  Omit this if the
1593     // new node is the EntryToken (e.g. if a store managed to get optimized
1594     // out), because re-visiting the EntryToken and its users will not uncover
1595     // any additional opportunities, but there may be a large number of such
1596     // users, potentially causing compile time explosion.
1597     if (RV.getOpcode() != ISD::EntryToken) {
1598       AddToWorklist(RV.getNode());
1599       AddUsersToWorklist(RV.getNode());
1600     }
1601 
1602     // Finally, if the node is now dead, remove it from the graph.  The node
1603     // may not be dead if the replacement process recursively simplified to
1604     // something else needing this node. This will also take care of adding any
1605     // operands which have lost a user to the worklist.
1606     recursivelyDeleteUnusedNodes(N);
1607   }
1608 
1609   // If the root changed (e.g. it was a dead load, update the root).
1610   DAG.setRoot(Dummy.getValue());
1611   DAG.RemoveDeadNodes();
1612 }
1613 
1614 SDValue DAGCombiner::visit(SDNode *N) {
1615   switch (N->getOpcode()) {
1616   default: break;
1617   case ISD::TokenFactor:        return visitTokenFactor(N);
1618   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1619   case ISD::ADD:                return visitADD(N);
1620   case ISD::SUB:                return visitSUB(N);
1621   case ISD::SADDSAT:
1622   case ISD::UADDSAT:            return visitADDSAT(N);
1623   case ISD::SSUBSAT:
1624   case ISD::USUBSAT:            return visitSUBSAT(N);
1625   case ISD::ADDC:               return visitADDC(N);
1626   case ISD::SADDO:
1627   case ISD::UADDO:              return visitADDO(N);
1628   case ISD::SUBC:               return visitSUBC(N);
1629   case ISD::SSUBO:
1630   case ISD::USUBO:              return visitSUBO(N);
1631   case ISD::ADDE:               return visitADDE(N);
1632   case ISD::ADDCARRY:           return visitADDCARRY(N);
1633   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1634   case ISD::SUBE:               return visitSUBE(N);
1635   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1636   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1637   case ISD::SMULFIX:
1638   case ISD::SMULFIXSAT:
1639   case ISD::UMULFIX:
1640   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1641   case ISD::MUL:                return visitMUL(N);
1642   case ISD::SDIV:               return visitSDIV(N);
1643   case ISD::UDIV:               return visitUDIV(N);
1644   case ISD::SREM:
1645   case ISD::UREM:               return visitREM(N);
1646   case ISD::MULHU:              return visitMULHU(N);
1647   case ISD::MULHS:              return visitMULHS(N);
1648   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1649   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1650   case ISD::SMULO:
1651   case ISD::UMULO:              return visitMULO(N);
1652   case ISD::SMIN:
1653   case ISD::SMAX:
1654   case ISD::UMIN:
1655   case ISD::UMAX:               return visitIMINMAX(N);
1656   case ISD::AND:                return visitAND(N);
1657   case ISD::OR:                 return visitOR(N);
1658   case ISD::XOR:                return visitXOR(N);
1659   case ISD::SHL:                return visitSHL(N);
1660   case ISD::SRA:                return visitSRA(N);
1661   case ISD::SRL:                return visitSRL(N);
1662   case ISD::ROTR:
1663   case ISD::ROTL:               return visitRotate(N);
1664   case ISD::FSHL:
1665   case ISD::FSHR:               return visitFunnelShift(N);
1666   case ISD::ABS:                return visitABS(N);
1667   case ISD::BSWAP:              return visitBSWAP(N);
1668   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1669   case ISD::CTLZ:               return visitCTLZ(N);
1670   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1671   case ISD::CTTZ:               return visitCTTZ(N);
1672   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1673   case ISD::CTPOP:              return visitCTPOP(N);
1674   case ISD::SELECT:             return visitSELECT(N);
1675   case ISD::VSELECT:            return visitVSELECT(N);
1676   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1677   case ISD::SETCC:              return visitSETCC(N);
1678   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1679   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1680   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1681   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1682   case ISD::AssertSext:
1683   case ISD::AssertZext:         return visitAssertExt(N);
1684   case ISD::AssertAlign:        return visitAssertAlign(N);
1685   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1686   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1687   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1688   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1689   case ISD::BITCAST:            return visitBITCAST(N);
1690   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1691   case ISD::FADD:               return visitFADD(N);
1692   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
1693   case ISD::FSUB:               return visitFSUB(N);
1694   case ISD::FMUL:               return visitFMUL(N);
1695   case ISD::FMA:                return visitFMA(N);
1696   case ISD::FDIV:               return visitFDIV(N);
1697   case ISD::FREM:               return visitFREM(N);
1698   case ISD::FSQRT:              return visitFSQRT(N);
1699   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1700   case ISD::FPOW:               return visitFPOW(N);
1701   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1702   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1703   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1704   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1705   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1706   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1707   case ISD::FNEG:               return visitFNEG(N);
1708   case ISD::FABS:               return visitFABS(N);
1709   case ISD::FFLOOR:             return visitFFLOOR(N);
1710   case ISD::FMINNUM:            return visitFMINNUM(N);
1711   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1712   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1713   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1714   case ISD::FCEIL:              return visitFCEIL(N);
1715   case ISD::FTRUNC:             return visitFTRUNC(N);
1716   case ISD::BRCOND:             return visitBRCOND(N);
1717   case ISD::BR_CC:              return visitBR_CC(N);
1718   case ISD::LOAD:               return visitLOAD(N);
1719   case ISD::STORE:              return visitSTORE(N);
1720   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1721   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1722   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1723   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1724   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1725   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1726   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1727   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1728   case ISD::MGATHER:            return visitMGATHER(N);
1729   case ISD::MLOAD:              return visitMLOAD(N);
1730   case ISD::MSCATTER:           return visitMSCATTER(N);
1731   case ISD::MSTORE:             return visitMSTORE(N);
1732   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1733   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1734   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1735   case ISD::FREEZE:             return visitFREEZE(N);
1736   case ISD::VECREDUCE_FADD:
1737   case ISD::VECREDUCE_FMUL:
1738   case ISD::VECREDUCE_ADD:
1739   case ISD::VECREDUCE_MUL:
1740   case ISD::VECREDUCE_AND:
1741   case ISD::VECREDUCE_OR:
1742   case ISD::VECREDUCE_XOR:
1743   case ISD::VECREDUCE_SMAX:
1744   case ISD::VECREDUCE_SMIN:
1745   case ISD::VECREDUCE_UMAX:
1746   case ISD::VECREDUCE_UMIN:
1747   case ISD::VECREDUCE_FMAX:
1748   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1749   }
1750   return SDValue();
1751 }
1752 
1753 SDValue DAGCombiner::combine(SDNode *N) {
1754   SDValue RV;
1755   if (!DisableGenericCombines)
1756     RV = visit(N);
1757 
1758   // If nothing happened, try a target-specific DAG combine.
1759   if (!RV.getNode()) {
1760     assert(N->getOpcode() != ISD::DELETED_NODE &&
1761            "Node was deleted but visit returned NULL!");
1762 
1763     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1764         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1765 
1766       // Expose the DAG combiner to the target combiner impls.
1767       TargetLowering::DAGCombinerInfo
1768         DagCombineInfo(DAG, Level, false, this);
1769 
1770       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1771     }
1772   }
1773 
1774   // If nothing happened still, try promoting the operation.
1775   if (!RV.getNode()) {
1776     switch (N->getOpcode()) {
1777     default: break;
1778     case ISD::ADD:
1779     case ISD::SUB:
1780     case ISD::MUL:
1781     case ISD::AND:
1782     case ISD::OR:
1783     case ISD::XOR:
1784       RV = PromoteIntBinOp(SDValue(N, 0));
1785       break;
1786     case ISD::SHL:
1787     case ISD::SRA:
1788     case ISD::SRL:
1789       RV = PromoteIntShiftOp(SDValue(N, 0));
1790       break;
1791     case ISD::SIGN_EXTEND:
1792     case ISD::ZERO_EXTEND:
1793     case ISD::ANY_EXTEND:
1794       RV = PromoteExtend(SDValue(N, 0));
1795       break;
1796     case ISD::LOAD:
1797       if (PromoteLoad(SDValue(N, 0)))
1798         RV = SDValue(N, 0);
1799       break;
1800     }
1801   }
1802 
1803   // If N is a commutative binary node, try to eliminate it if the commuted
1804   // version is already present in the DAG.
1805   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1806       N->getNumValues() == 1) {
1807     SDValue N0 = N->getOperand(0);
1808     SDValue N1 = N->getOperand(1);
1809 
1810     // Constant operands are canonicalized to RHS.
1811     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1812       SDValue Ops[] = {N1, N0};
1813       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1814                                             N->getFlags());
1815       if (CSENode)
1816         return SDValue(CSENode, 0);
1817     }
1818   }
1819 
1820   return RV;
1821 }
1822 
1823 /// Given a node, return its input chain if it has one, otherwise return a null
1824 /// sd operand.
1825 static SDValue getInputChainForNode(SDNode *N) {
1826   if (unsigned NumOps = N->getNumOperands()) {
1827     if (N->getOperand(0).getValueType() == MVT::Other)
1828       return N->getOperand(0);
1829     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1830       return N->getOperand(NumOps-1);
1831     for (unsigned i = 1; i < NumOps-1; ++i)
1832       if (N->getOperand(i).getValueType() == MVT::Other)
1833         return N->getOperand(i);
1834   }
1835   return SDValue();
1836 }
1837 
1838 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1839   // If N has two operands, where one has an input chain equal to the other,
1840   // the 'other' chain is redundant.
1841   if (N->getNumOperands() == 2) {
1842     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1843       return N->getOperand(0);
1844     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1845       return N->getOperand(1);
1846   }
1847 
1848   // Don't simplify token factors if optnone.
1849   if (OptLevel == CodeGenOpt::None)
1850     return SDValue();
1851 
1852   // Don't simplify the token factor if the node itself has too many operands.
1853   if (N->getNumOperands() > TokenFactorInlineLimit)
1854     return SDValue();
1855 
1856   // If the sole user is a token factor, we should make sure we have a
1857   // chance to merge them together. This prevents TF chains from inhibiting
1858   // optimizations.
1859   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1860     AddToWorklist(*(N->use_begin()));
1861 
1862   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1863   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1864   SmallPtrSet<SDNode*, 16> SeenOps;
1865   bool Changed = false;             // If we should replace this token factor.
1866 
1867   // Start out with this token factor.
1868   TFs.push_back(N);
1869 
1870   // Iterate through token factors.  The TFs grows when new token factors are
1871   // encountered.
1872   for (unsigned i = 0; i < TFs.size(); ++i) {
1873     // Limit number of nodes to inline, to avoid quadratic compile times.
1874     // We have to add the outstanding Token Factors to Ops, otherwise we might
1875     // drop Ops from the resulting Token Factors.
1876     if (Ops.size() > TokenFactorInlineLimit) {
1877       for (unsigned j = i; j < TFs.size(); j++)
1878         Ops.emplace_back(TFs[j], 0);
1879       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1880       // combiner worklist later.
1881       TFs.resize(i);
1882       break;
1883     }
1884 
1885     SDNode *TF = TFs[i];
1886     // Check each of the operands.
1887     for (const SDValue &Op : TF->op_values()) {
1888       switch (Op.getOpcode()) {
1889       case ISD::EntryToken:
1890         // Entry tokens don't need to be added to the list. They are
1891         // redundant.
1892         Changed = true;
1893         break;
1894 
1895       case ISD::TokenFactor:
1896         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1897           // Queue up for processing.
1898           TFs.push_back(Op.getNode());
1899           Changed = true;
1900           break;
1901         }
1902         LLVM_FALLTHROUGH;
1903 
1904       default:
1905         // Only add if it isn't already in the list.
1906         if (SeenOps.insert(Op.getNode()).second)
1907           Ops.push_back(Op);
1908         else
1909           Changed = true;
1910         break;
1911       }
1912     }
1913   }
1914 
1915   // Re-visit inlined Token Factors, to clean them up in case they have been
1916   // removed. Skip the first Token Factor, as this is the current node.
1917   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1918     AddToWorklist(TFs[i]);
1919 
1920   // Remove Nodes that are chained to another node in the list. Do so
1921   // by walking up chains breath-first stopping when we've seen
1922   // another operand. In general we must climb to the EntryNode, but we can exit
1923   // early if we find all remaining work is associated with just one operand as
1924   // no further pruning is possible.
1925 
1926   // List of nodes to search through and original Ops from which they originate.
1927   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1928   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1929   SmallPtrSet<SDNode *, 16> SeenChains;
1930   bool DidPruneOps = false;
1931 
1932   unsigned NumLeftToConsider = 0;
1933   for (const SDValue &Op : Ops) {
1934     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1935     OpWorkCount.push_back(1);
1936   }
1937 
1938   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1939     // If this is an Op, we can remove the op from the list. Remark any
1940     // search associated with it as from the current OpNumber.
1941     if (SeenOps.count(Op) != 0) {
1942       Changed = true;
1943       DidPruneOps = true;
1944       unsigned OrigOpNumber = 0;
1945       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1946         OrigOpNumber++;
1947       assert((OrigOpNumber != Ops.size()) &&
1948              "expected to find TokenFactor Operand");
1949       // Re-mark worklist from OrigOpNumber to OpNumber
1950       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1951         if (Worklist[i].second == OrigOpNumber) {
1952           Worklist[i].second = OpNumber;
1953         }
1954       }
1955       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1956       OpWorkCount[OrigOpNumber] = 0;
1957       NumLeftToConsider--;
1958     }
1959     // Add if it's a new chain
1960     if (SeenChains.insert(Op).second) {
1961       OpWorkCount[OpNumber]++;
1962       Worklist.push_back(std::make_pair(Op, OpNumber));
1963     }
1964   };
1965 
1966   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1967     // We need at least be consider at least 2 Ops to prune.
1968     if (NumLeftToConsider <= 1)
1969       break;
1970     auto CurNode = Worklist[i].first;
1971     auto CurOpNumber = Worklist[i].second;
1972     assert((OpWorkCount[CurOpNumber] > 0) &&
1973            "Node should not appear in worklist");
1974     switch (CurNode->getOpcode()) {
1975     case ISD::EntryToken:
1976       // Hitting EntryToken is the only way for the search to terminate without
1977       // hitting
1978       // another operand's search. Prevent us from marking this operand
1979       // considered.
1980       NumLeftToConsider++;
1981       break;
1982     case ISD::TokenFactor:
1983       for (const SDValue &Op : CurNode->op_values())
1984         AddToWorklist(i, Op.getNode(), CurOpNumber);
1985       break;
1986     case ISD::LIFETIME_START:
1987     case ISD::LIFETIME_END:
1988     case ISD::CopyFromReg:
1989     case ISD::CopyToReg:
1990       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1991       break;
1992     default:
1993       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1994         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1995       break;
1996     }
1997     OpWorkCount[CurOpNumber]--;
1998     if (OpWorkCount[CurOpNumber] == 0)
1999       NumLeftToConsider--;
2000   }
2001 
2002   // If we've changed things around then replace token factor.
2003   if (Changed) {
2004     SDValue Result;
2005     if (Ops.empty()) {
2006       // The entry token is the only possible outcome.
2007       Result = DAG.getEntryNode();
2008     } else {
2009       if (DidPruneOps) {
2010         SmallVector<SDValue, 8> PrunedOps;
2011         //
2012         for (const SDValue &Op : Ops) {
2013           if (SeenChains.count(Op.getNode()) == 0)
2014             PrunedOps.push_back(Op);
2015         }
2016         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2017       } else {
2018         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2019       }
2020     }
2021     return Result;
2022   }
2023   return SDValue();
2024 }
2025 
2026 /// MERGE_VALUES can always be eliminated.
2027 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2028   WorklistRemover DeadNodes(*this);
2029   // Replacing results may cause a different MERGE_VALUES to suddenly
2030   // be CSE'd with N, and carry its uses with it. Iterate until no
2031   // uses remain, to ensure that the node can be safely deleted.
2032   // First add the users of this node to the work list so that they
2033   // can be tried again once they have new operands.
2034   AddUsersToWorklist(N);
2035   do {
2036     // Do as a single replacement to avoid rewalking use lists.
2037     SmallVector<SDValue, 8> Ops;
2038     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2039       Ops.push_back(N->getOperand(i));
2040     DAG.ReplaceAllUsesWith(N, Ops.data());
2041   } while (!N->use_empty());
2042   deleteAndRecombine(N);
2043   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2044 }
2045 
2046 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2047 /// ConstantSDNode pointer else nullptr.
2048 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2049   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2050   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2051 }
2052 
2053 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2054 /// and that N may be folded in the load / store addressing mode.
2055 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2056                                     const TargetLowering &TLI) {
2057   EVT VT;
2058   unsigned AS;
2059 
2060   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2061     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2062       return false;
2063     VT = LD->getMemoryVT();
2064     AS = LD->getAddressSpace();
2065   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2066     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2067       return false;
2068     VT = ST->getMemoryVT();
2069     AS = ST->getAddressSpace();
2070   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2071     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2072       return false;
2073     VT = LD->getMemoryVT();
2074     AS = LD->getAddressSpace();
2075   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2076     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2077       return false;
2078     VT = ST->getMemoryVT();
2079     AS = ST->getAddressSpace();
2080   } else
2081     return false;
2082 
2083   TargetLowering::AddrMode AM;
2084   if (N->getOpcode() == ISD::ADD) {
2085     AM.HasBaseReg = true;
2086     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2087     if (Offset)
2088       // [reg +/- imm]
2089       AM.BaseOffs = Offset->getSExtValue();
2090     else
2091       // [reg +/- reg]
2092       AM.Scale = 1;
2093   } else if (N->getOpcode() == ISD::SUB) {
2094     AM.HasBaseReg = true;
2095     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2096     if (Offset)
2097       // [reg +/- imm]
2098       AM.BaseOffs = -Offset->getSExtValue();
2099     else
2100       // [reg +/- reg]
2101       AM.Scale = 1;
2102   } else
2103     return false;
2104 
2105   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2106                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2107 }
2108 
2109 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2110   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2111          "Unexpected binary operator");
2112 
2113   // Don't do this unless the old select is going away. We want to eliminate the
2114   // binary operator, not replace a binop with a select.
2115   // TODO: Handle ISD::SELECT_CC.
2116   unsigned SelOpNo = 0;
2117   SDValue Sel = BO->getOperand(0);
2118   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2119     SelOpNo = 1;
2120     Sel = BO->getOperand(1);
2121   }
2122 
2123   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2124     return SDValue();
2125 
2126   SDValue CT = Sel.getOperand(1);
2127   if (!isConstantOrConstantVector(CT, true) &&
2128       !isConstantFPBuildVectorOrConstantFP(CT))
2129     return SDValue();
2130 
2131   SDValue CF = Sel.getOperand(2);
2132   if (!isConstantOrConstantVector(CF, true) &&
2133       !isConstantFPBuildVectorOrConstantFP(CF))
2134     return SDValue();
2135 
2136   // Bail out if any constants are opaque because we can't constant fold those.
2137   // The exception is "and" and "or" with either 0 or -1 in which case we can
2138   // propagate non constant operands into select. I.e.:
2139   // and (select Cond, 0, -1), X --> select Cond, 0, X
2140   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2141   auto BinOpcode = BO->getOpcode();
2142   bool CanFoldNonConst =
2143       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2144       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2145       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2146 
2147   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2148   if (!CanFoldNonConst &&
2149       !isConstantOrConstantVector(CBO, true) &&
2150       !isConstantFPBuildVectorOrConstantFP(CBO))
2151     return SDValue();
2152 
2153   EVT VT = BO->getValueType(0);
2154 
2155   // We have a select-of-constants followed by a binary operator with a
2156   // constant. Eliminate the binop by pulling the constant math into the select.
2157   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2158   SDLoc DL(Sel);
2159   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2160                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2161   if (!CanFoldNonConst && !NewCT.isUndef() &&
2162       !isConstantOrConstantVector(NewCT, true) &&
2163       !isConstantFPBuildVectorOrConstantFP(NewCT))
2164     return SDValue();
2165 
2166   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2167                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2168   if (!CanFoldNonConst && !NewCF.isUndef() &&
2169       !isConstantOrConstantVector(NewCF, true) &&
2170       !isConstantFPBuildVectorOrConstantFP(NewCF))
2171     return SDValue();
2172 
2173   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2174   SelectOp->setFlags(BO->getFlags());
2175   return SelectOp;
2176 }
2177 
2178 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2179   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2180          "Expecting add or sub");
2181 
2182   // Match a constant operand and a zext operand for the math instruction:
2183   // add Z, C
2184   // sub C, Z
2185   bool IsAdd = N->getOpcode() == ISD::ADD;
2186   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2187   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2188   auto *CN = dyn_cast<ConstantSDNode>(C);
2189   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2190     return SDValue();
2191 
2192   // Match the zext operand as a setcc of a boolean.
2193   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2194       Z.getOperand(0).getValueType() != MVT::i1)
2195     return SDValue();
2196 
2197   // Match the compare as: setcc (X & 1), 0, eq.
2198   SDValue SetCC = Z.getOperand(0);
2199   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2200   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2201       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2202       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2203     return SDValue();
2204 
2205   // We are adding/subtracting a constant and an inverted low bit. Turn that
2206   // into a subtract/add of the low bit with incremented/decremented constant:
2207   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2208   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2209   EVT VT = C.getValueType();
2210   SDLoc DL(N);
2211   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2212   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2213                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2214   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2215 }
2216 
2217 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2218 /// a shift and add with a different constant.
2219 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2220   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2221          "Expecting add or sub");
2222 
2223   // We need a constant operand for the add/sub, and the other operand is a
2224   // logical shift right: add (srl), C or sub C, (srl).
2225   bool IsAdd = N->getOpcode() == ISD::ADD;
2226   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2227   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2228   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2229       ShiftOp.getOpcode() != ISD::SRL)
2230     return SDValue();
2231 
2232   // The shift must be of a 'not' value.
2233   SDValue Not = ShiftOp.getOperand(0);
2234   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2235     return SDValue();
2236 
2237   // The shift must be moving the sign bit to the least-significant-bit.
2238   EVT VT = ShiftOp.getValueType();
2239   SDValue ShAmt = ShiftOp.getOperand(1);
2240   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2241   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2242     return SDValue();
2243 
2244   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2245   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2246   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2247   SDLoc DL(N);
2248   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2249   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2250   if (SDValue NewC =
2251           DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2252                                      {ConstantOp, DAG.getConstant(1, DL, VT)}))
2253     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2254   return SDValue();
2255 }
2256 
2257 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2258 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2259 /// are no common bits set in the operands).
2260 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2261   SDValue N0 = N->getOperand(0);
2262   SDValue N1 = N->getOperand(1);
2263   EVT VT = N0.getValueType();
2264   SDLoc DL(N);
2265 
2266   // fold vector ops
2267   if (VT.isVector()) {
2268     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2269       return FoldedVOp;
2270 
2271     // fold (add x, 0) -> x, vector edition
2272     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2273       return N0;
2274     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2275       return N1;
2276   }
2277 
2278   // fold (add x, undef) -> undef
2279   if (N0.isUndef())
2280     return N0;
2281 
2282   if (N1.isUndef())
2283     return N1;
2284 
2285   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2286     // canonicalize constant to RHS
2287     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2288       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2289     // fold (add c1, c2) -> c1+c2
2290     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1});
2291   }
2292 
2293   // fold (add x, 0) -> x
2294   if (isNullConstant(N1))
2295     return N0;
2296 
2297   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2298     // fold ((A-c1)+c2) -> (A+(c2-c1))
2299     if (N0.getOpcode() == ISD::SUB &&
2300         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2301       SDValue Sub =
2302           DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)});
2303       assert(Sub && "Constant folding failed");
2304       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2305     }
2306 
2307     // fold ((c1-A)+c2) -> (c1+c2)-A
2308     if (N0.getOpcode() == ISD::SUB &&
2309         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2310       SDValue Add =
2311           DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)});
2312       assert(Add && "Constant folding failed");
2313       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2314     }
2315 
2316     // add (sext i1 X), 1 -> zext (not i1 X)
2317     // We don't transform this pattern:
2318     //   add (zext i1 X), -1 -> sext (not i1 X)
2319     // because most (?) targets generate better code for the zext form.
2320     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2321         isOneOrOneSplat(N1)) {
2322       SDValue X = N0.getOperand(0);
2323       if ((!LegalOperations ||
2324            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2325             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2326           X.getScalarValueSizeInBits() == 1) {
2327         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2328         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2329       }
2330     }
2331 
2332     // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is
2333     // equivalent to (add x, c0).
2334     if (N0.getOpcode() == ISD::OR &&
2335         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) &&
2336         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2337       if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT,
2338                                                     {N1, N0.getOperand(1)}))
2339         return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2340     }
2341   }
2342 
2343   if (SDValue NewSel = foldBinOpIntoSelect(N))
2344     return NewSel;
2345 
2346   // reassociate add
2347   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2348     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2349       return RADD;
2350   }
2351   // fold ((0-A) + B) -> B-A
2352   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2353     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2354 
2355   // fold (A + (0-B)) -> A-B
2356   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2357     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2358 
2359   // fold (A+(B-A)) -> B
2360   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2361     return N1.getOperand(0);
2362 
2363   // fold ((B-A)+A) -> B
2364   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2365     return N0.getOperand(0);
2366 
2367   // fold ((A-B)+(C-A)) -> (C-B)
2368   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2369       N0.getOperand(0) == N1.getOperand(1))
2370     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2371                        N0.getOperand(1));
2372 
2373   // fold ((A-B)+(B-C)) -> (A-C)
2374   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2375       N0.getOperand(1) == N1.getOperand(0))
2376     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2377                        N1.getOperand(1));
2378 
2379   // fold (A+(B-(A+C))) to (B-C)
2380   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2381       N0 == N1.getOperand(1).getOperand(0))
2382     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2383                        N1.getOperand(1).getOperand(1));
2384 
2385   // fold (A+(B-(C+A))) to (B-C)
2386   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2387       N0 == N1.getOperand(1).getOperand(1))
2388     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2389                        N1.getOperand(1).getOperand(0));
2390 
2391   // fold (A+((B-A)+or-C)) to (B+or-C)
2392   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2393       N1.getOperand(0).getOpcode() == ISD::SUB &&
2394       N0 == N1.getOperand(0).getOperand(1))
2395     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2396                        N1.getOperand(1));
2397 
2398   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2399   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2400     SDValue N00 = N0.getOperand(0);
2401     SDValue N01 = N0.getOperand(1);
2402     SDValue N10 = N1.getOperand(0);
2403     SDValue N11 = N1.getOperand(1);
2404 
2405     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2406       return DAG.getNode(ISD::SUB, DL, VT,
2407                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2408                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2409   }
2410 
2411   // fold (add (umax X, C), -C) --> (usubsat X, C)
2412   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2413     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2414       return (!Max && !Op) ||
2415              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2416     };
2417     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2418                                   /*AllowUndefs*/ true))
2419       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2420                          N0.getOperand(1));
2421   }
2422 
2423   if (SimplifyDemandedBits(SDValue(N, 0)))
2424     return SDValue(N, 0);
2425 
2426   if (isOneOrOneSplat(N1)) {
2427     // fold (add (xor a, -1), 1) -> (sub 0, a)
2428     if (isBitwiseNot(N0))
2429       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2430                          N0.getOperand(0));
2431 
2432     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2433     if (N0.getOpcode() == ISD::ADD ||
2434         N0.getOpcode() == ISD::UADDO ||
2435         N0.getOpcode() == ISD::SADDO) {
2436       SDValue A, Xor;
2437 
2438       if (isBitwiseNot(N0.getOperand(0))) {
2439         A = N0.getOperand(1);
2440         Xor = N0.getOperand(0);
2441       } else if (isBitwiseNot(N0.getOperand(1))) {
2442         A = N0.getOperand(0);
2443         Xor = N0.getOperand(1);
2444       }
2445 
2446       if (Xor)
2447         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2448     }
2449 
2450     // Look for:
2451     //   add (add x, y), 1
2452     // And if the target does not like this form then turn into:
2453     //   sub y, (xor x, -1)
2454     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2455         N0.getOpcode() == ISD::ADD) {
2456       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2457                                 DAG.getAllOnesConstant(DL, VT));
2458       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2459     }
2460   }
2461 
2462   // (x - y) + -1  ->  add (xor y, -1), x
2463   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2464       isAllOnesOrAllOnesSplat(N1)) {
2465     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2466     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2467   }
2468 
2469   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2470     return Combined;
2471 
2472   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2473     return Combined;
2474 
2475   return SDValue();
2476 }
2477 
2478 SDValue DAGCombiner::visitADD(SDNode *N) {
2479   SDValue N0 = N->getOperand(0);
2480   SDValue N1 = N->getOperand(1);
2481   EVT VT = N0.getValueType();
2482   SDLoc DL(N);
2483 
2484   if (SDValue Combined = visitADDLike(N))
2485     return Combined;
2486 
2487   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2488     return V;
2489 
2490   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2491     return V;
2492 
2493   // fold (a+b) -> (a|b) iff a and b share no bits.
2494   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2495       DAG.haveNoCommonBitsSet(N0, N1))
2496     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2497 
2498   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2499   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2500     const APInt &C0 = N0->getConstantOperandAPInt(0);
2501     const APInt &C1 = N1->getConstantOperandAPInt(0);
2502     return DAG.getVScale(DL, VT, C0 + C1);
2503   }
2504 
2505   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2506   if ((N0.getOpcode() == ISD::ADD) &&
2507       (N0.getOperand(1).getOpcode() == ISD::VSCALE) &&
2508       (N1.getOpcode() == ISD::VSCALE)) {
2509     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2510     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2511     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2512     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2513   }
2514 
2515   return SDValue();
2516 }
2517 
2518 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2519   unsigned Opcode = N->getOpcode();
2520   SDValue N0 = N->getOperand(0);
2521   SDValue N1 = N->getOperand(1);
2522   EVT VT = N0.getValueType();
2523   SDLoc DL(N);
2524 
2525   // fold vector ops
2526   if (VT.isVector()) {
2527     // TODO SimplifyVBinOp
2528 
2529     // fold (add_sat x, 0) -> x, vector edition
2530     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2531       return N0;
2532     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2533       return N1;
2534   }
2535 
2536   // fold (add_sat x, undef) -> -1
2537   if (N0.isUndef() || N1.isUndef())
2538     return DAG.getAllOnesConstant(DL, VT);
2539 
2540   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2541     // canonicalize constant to RHS
2542     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2543       return DAG.getNode(Opcode, DL, VT, N1, N0);
2544     // fold (add_sat c1, c2) -> c3
2545     return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1});
2546   }
2547 
2548   // fold (add_sat x, 0) -> x
2549   if (isNullConstant(N1))
2550     return N0;
2551 
2552   // If it cannot overflow, transform into an add.
2553   if (Opcode == ISD::UADDSAT)
2554     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2555       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2556 
2557   return SDValue();
2558 }
2559 
2560 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2561   bool Masked = false;
2562 
2563   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2564   while (true) {
2565     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2566       V = V.getOperand(0);
2567       continue;
2568     }
2569 
2570     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2571       Masked = true;
2572       V = V.getOperand(0);
2573       continue;
2574     }
2575 
2576     break;
2577   }
2578 
2579   // If this is not a carry, return.
2580   if (V.getResNo() != 1)
2581     return SDValue();
2582 
2583   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2584       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2585     return SDValue();
2586 
2587   EVT VT = V.getNode()->getValueType(0);
2588   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2589     return SDValue();
2590 
2591   // If the result is masked, then no matter what kind of bool it is we can
2592   // return. If it isn't, then we need to make sure the bool type is either 0 or
2593   // 1 and not other values.
2594   if (Masked ||
2595       TLI.getBooleanContents(V.getValueType()) ==
2596           TargetLoweringBase::ZeroOrOneBooleanContent)
2597     return V;
2598 
2599   return SDValue();
2600 }
2601 
2602 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2603 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2604 /// the opcode and bypass the mask operation.
2605 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2606                                  SelectionDAG &DAG, const SDLoc &DL) {
2607   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2608     return SDValue();
2609 
2610   EVT VT = N0.getValueType();
2611   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2612     return SDValue();
2613 
2614   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2615   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2616   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2617 }
2618 
2619 /// Helper for doing combines based on N0 and N1 being added to each other.
2620 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2621                                           SDNode *LocReference) {
2622   EVT VT = N0.getValueType();
2623   SDLoc DL(LocReference);
2624 
2625   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2626   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2627       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2628     return DAG.getNode(ISD::SUB, DL, VT, N0,
2629                        DAG.getNode(ISD::SHL, DL, VT,
2630                                    N1.getOperand(0).getOperand(1),
2631                                    N1.getOperand(1)));
2632 
2633   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2634     return V;
2635 
2636   // Look for:
2637   //   add (add x, 1), y
2638   // And if the target does not like this form then turn into:
2639   //   sub y, (xor x, -1)
2640   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2641       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2642     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2643                               DAG.getAllOnesConstant(DL, VT));
2644     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2645   }
2646 
2647   // Hoist one-use subtraction by non-opaque constant:
2648   //   (x - C) + y  ->  (x + y) - C
2649   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2650   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2651       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2652     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2653     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2654   }
2655   // Hoist one-use subtraction from non-opaque constant:
2656   //   (C - x) + y  ->  (y - x) + C
2657   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2658       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2659     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2660     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2661   }
2662 
2663   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2664   // rather than 'add 0/-1' (the zext should get folded).
2665   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2666   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2667       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2668       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2669     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2670     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2671   }
2672 
2673   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2674   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2675     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2676     if (TN->getVT() == MVT::i1) {
2677       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2678                                  DAG.getConstant(1, DL, VT));
2679       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2680     }
2681   }
2682 
2683   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2684   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2685       N1.getResNo() == 0)
2686     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2687                        N0, N1.getOperand(0), N1.getOperand(2));
2688 
2689   // (add X, Carry) -> (addcarry X, 0, Carry)
2690   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2691     if (SDValue Carry = getAsCarry(TLI, N1))
2692       return DAG.getNode(ISD::ADDCARRY, DL,
2693                          DAG.getVTList(VT, Carry.getValueType()), N0,
2694                          DAG.getConstant(0, DL, VT), Carry);
2695 
2696   return SDValue();
2697 }
2698 
2699 SDValue DAGCombiner::visitADDC(SDNode *N) {
2700   SDValue N0 = N->getOperand(0);
2701   SDValue N1 = N->getOperand(1);
2702   EVT VT = N0.getValueType();
2703   SDLoc DL(N);
2704 
2705   // If the flag result is dead, turn this into an ADD.
2706   if (!N->hasAnyUseOfValue(1))
2707     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2708                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2709 
2710   // canonicalize constant to RHS.
2711   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2712   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2713   if (N0C && !N1C)
2714     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2715 
2716   // fold (addc x, 0) -> x + no carry out
2717   if (isNullConstant(N1))
2718     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2719                                         DL, MVT::Glue));
2720 
2721   // If it cannot overflow, transform into an add.
2722   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2723     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2724                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2725 
2726   return SDValue();
2727 }
2728 
2729 /**
2730  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2731  * then the flip also occurs if computing the inverse is the same cost.
2732  * This function returns an empty SDValue in case it cannot flip the boolean
2733  * without increasing the cost of the computation. If you want to flip a boolean
2734  * no matter what, use DAG.getLogicalNOT.
2735  */
2736 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2737                                   const TargetLowering &TLI,
2738                                   bool Force) {
2739   if (Force && isa<ConstantSDNode>(V))
2740     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2741 
2742   if (V.getOpcode() != ISD::XOR)
2743     return SDValue();
2744 
2745   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2746   if (!Const)
2747     return SDValue();
2748 
2749   EVT VT = V.getValueType();
2750 
2751   bool IsFlip = false;
2752   switch(TLI.getBooleanContents(VT)) {
2753     case TargetLowering::ZeroOrOneBooleanContent:
2754       IsFlip = Const->isOne();
2755       break;
2756     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2757       IsFlip = Const->isAllOnesValue();
2758       break;
2759     case TargetLowering::UndefinedBooleanContent:
2760       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2761       break;
2762   }
2763 
2764   if (IsFlip)
2765     return V.getOperand(0);
2766   if (Force)
2767     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
2768   return SDValue();
2769 }
2770 
2771 SDValue DAGCombiner::visitADDO(SDNode *N) {
2772   SDValue N0 = N->getOperand(0);
2773   SDValue N1 = N->getOperand(1);
2774   EVT VT = N0.getValueType();
2775   bool IsSigned = (ISD::SADDO == N->getOpcode());
2776 
2777   EVT CarryVT = N->getValueType(1);
2778   SDLoc DL(N);
2779 
2780   // If the flag result is dead, turn this into an ADD.
2781   if (!N->hasAnyUseOfValue(1))
2782     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2783                      DAG.getUNDEF(CarryVT));
2784 
2785   // canonicalize constant to RHS.
2786   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2787       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2788     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2789 
2790   // fold (addo x, 0) -> x + no carry out
2791   if (isNullOrNullSplat(N1))
2792     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2793 
2794   if (!IsSigned) {
2795     // If it cannot overflow, transform into an add.
2796     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2797       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2798                        DAG.getConstant(0, DL, CarryVT));
2799 
2800     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2801     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2802       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2803                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2804       return CombineTo(
2805           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
2806     }
2807 
2808     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2809       return Combined;
2810 
2811     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2812       return Combined;
2813   }
2814 
2815   return SDValue();
2816 }
2817 
2818 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2819   EVT VT = N0.getValueType();
2820   if (VT.isVector())
2821     return SDValue();
2822 
2823   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2824   // If Y + 1 cannot overflow.
2825   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2826     SDValue Y = N1.getOperand(0);
2827     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2828     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2829       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2830                          N1.getOperand(2));
2831   }
2832 
2833   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2834   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2835     if (SDValue Carry = getAsCarry(TLI, N1))
2836       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2837                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2838 
2839   return SDValue();
2840 }
2841 
2842 SDValue DAGCombiner::visitADDE(SDNode *N) {
2843   SDValue N0 = N->getOperand(0);
2844   SDValue N1 = N->getOperand(1);
2845   SDValue CarryIn = N->getOperand(2);
2846 
2847   // canonicalize constant to RHS
2848   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2849   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2850   if (N0C && !N1C)
2851     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2852                        N1, N0, CarryIn);
2853 
2854   // fold (adde x, y, false) -> (addc x, y)
2855   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2856     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2857 
2858   return SDValue();
2859 }
2860 
2861 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2862   SDValue N0 = N->getOperand(0);
2863   SDValue N1 = N->getOperand(1);
2864   SDValue CarryIn = N->getOperand(2);
2865   SDLoc DL(N);
2866 
2867   // canonicalize constant to RHS
2868   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2869   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2870   if (N0C && !N1C)
2871     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2872 
2873   // fold (addcarry x, y, false) -> (uaddo x, y)
2874   if (isNullConstant(CarryIn)) {
2875     if (!LegalOperations ||
2876         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2877       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2878   }
2879 
2880   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2881   if (isNullConstant(N0) && isNullConstant(N1)) {
2882     EVT VT = N0.getValueType();
2883     EVT CarryVT = CarryIn.getValueType();
2884     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2885     AddToWorklist(CarryExt.getNode());
2886     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2887                                     DAG.getConstant(1, DL, VT)),
2888                      DAG.getConstant(0, DL, CarryVT));
2889   }
2890 
2891   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2892     return Combined;
2893 
2894   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2895     return Combined;
2896 
2897   return SDValue();
2898 }
2899 
2900 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
2901   SDValue N0 = N->getOperand(0);
2902   SDValue N1 = N->getOperand(1);
2903   SDValue CarryIn = N->getOperand(2);
2904   SDLoc DL(N);
2905 
2906   // canonicalize constant to RHS
2907   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2908   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2909   if (N0C && !N1C)
2910     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
2911 
2912   // fold (saddo_carry x, y, false) -> (saddo x, y)
2913   if (isNullConstant(CarryIn)) {
2914     if (!LegalOperations ||
2915         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
2916       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
2917   }
2918 
2919   return SDValue();
2920 }
2921 
2922 /**
2923  * If we are facing some sort of diamond carry propapagtion pattern try to
2924  * break it up to generate something like:
2925  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2926  *
2927  * The end result is usually an increase in operation required, but because the
2928  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2929  *
2930  * Patterns typically look something like
2931  *            (uaddo A, B)
2932  *             /       \
2933  *          Carry      Sum
2934  *            |          \
2935  *            | (addcarry *, 0, Z)
2936  *            |       /
2937  *             \   Carry
2938  *              |   /
2939  * (addcarry X, *, *)
2940  *
2941  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2942  * produce a combine with a single path for carry propagation.
2943  */
2944 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2945                                       SDValue X, SDValue Carry0, SDValue Carry1,
2946                                       SDNode *N) {
2947   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2948     return SDValue();
2949   if (Carry1.getOpcode() != ISD::UADDO)
2950     return SDValue();
2951 
2952   SDValue Z;
2953 
2954   /**
2955    * First look for a suitable Z. It will present itself in the form of
2956    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2957    */
2958   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2959       isNullConstant(Carry0.getOperand(1))) {
2960     Z = Carry0.getOperand(2);
2961   } else if (Carry0.getOpcode() == ISD::UADDO &&
2962              isOneConstant(Carry0.getOperand(1))) {
2963     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2964     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2965   } else {
2966     // We couldn't find a suitable Z.
2967     return SDValue();
2968   }
2969 
2970 
2971   auto cancelDiamond = [&](SDValue A,SDValue B) {
2972     SDLoc DL(N);
2973     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2974     Combiner.AddToWorklist(NewY.getNode());
2975     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2976                        DAG.getConstant(0, DL, X.getValueType()),
2977                        NewY.getValue(1));
2978   };
2979 
2980   /**
2981    *      (uaddo A, B)
2982    *           |
2983    *          Sum
2984    *           |
2985    * (addcarry *, 0, Z)
2986    */
2987   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2988     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2989   }
2990 
2991   /**
2992    * (addcarry A, 0, Z)
2993    *         |
2994    *        Sum
2995    *         |
2996    *  (uaddo *, B)
2997    */
2998   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2999     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3000   }
3001 
3002   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3003     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3004   }
3005 
3006   return SDValue();
3007 }
3008 
3009 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3010 // match patterns like:
3011 //
3012 //          (uaddo A, B)            CarryIn
3013 //            |  \                     |
3014 //            |   \                    |
3015 //    PartialSum   PartialCarryOutX   /
3016 //            |        |             /
3017 //            |    ____|____________/
3018 //            |   /    |
3019 //     (uaddo *, *)    \________
3020 //       |  \                   \
3021 //       |   \                   |
3022 //       |    PartialCarryOutY   |
3023 //       |        \              |
3024 //       |         \            /
3025 //   AddCarrySum    |    ______/
3026 //                  |   /
3027 //   CarryOut = (or *, *)
3028 //
3029 // And generate ADDCARRY (or SUBCARRY) with two result values:
3030 //
3031 //    {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
3032 //
3033 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
3034 // a single path for carry/borrow out propagation:
3035 static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
3036                                    const TargetLowering &TLI, SDValue Carry0,
3037                                    SDValue Carry1, SDNode *N) {
3038   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
3039     return SDValue();
3040   unsigned Opcode = Carry0.getOpcode();
3041   if (Opcode != Carry1.getOpcode())
3042     return SDValue();
3043   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3044     return SDValue();
3045 
3046   // Canonicalize the add/sub of A and B as Carry0 and the add/sub of the
3047   // carry/borrow in as Carry1. (The top and middle uaddo nodes respectively in
3048   // the above ASCII art.)
3049   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3050       Carry1.getOperand(1) != Carry0.getValue(0))
3051     std::swap(Carry0, Carry1);
3052   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3053       Carry1.getOperand(1) != Carry0.getValue(0))
3054     return SDValue();
3055 
3056   // The carry in value must be on the righthand side for subtraction.
3057   unsigned CarryInOperandNum =
3058       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3059   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3060     return SDValue();
3061   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3062 
3063   unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
3064   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3065     return SDValue();
3066 
3067   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3068   // TODO: make getAsCarry() aware of how partial carries are merged.
3069   if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
3070     return SDValue();
3071   CarryIn = CarryIn.getOperand(0);
3072   if (CarryIn.getValueType() != MVT::i1)
3073     return SDValue();
3074 
3075   SDLoc DL(N);
3076   SDValue Merged =
3077       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3078                   Carry0.getOperand(1), CarryIn);
3079 
3080   // Please note that because we have proven that the result of the UADDO/USUBO
3081   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3082   // therefore prove that if the first UADDO/USUBO overflows, the second
3083   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3084   // maximum value.
3085   //
3086   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3087   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3088   //
3089   // This is important because it means that OR and XOR can be used to merge
3090   // carry flags; and that AND can return a constant zero.
3091   //
3092   // TODO: match other operations that can merge flags (ADD, etc)
3093   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3094   if (N->getOpcode() == ISD::AND)
3095     return DAG.getConstant(0, DL, MVT::i1);
3096   return Merged.getValue(1);
3097 }
3098 
3099 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3100                                        SDNode *N) {
3101   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3102   if (isBitwiseNot(N0))
3103     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3104       SDLoc DL(N);
3105       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3106                                 N0.getOperand(0), NotC);
3107       return CombineTo(
3108           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3109     }
3110 
3111   // Iff the flag result is dead:
3112   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3113   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3114   // or the dependency between the instructions.
3115   if ((N0.getOpcode() == ISD::ADD ||
3116        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3117         N0.getValue(1) != CarryIn)) &&
3118       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3119     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3120                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3121 
3122   /**
3123    * When one of the addcarry argument is itself a carry, we may be facing
3124    * a diamond carry propagation. In which case we try to transform the DAG
3125    * to ensure linear carry propagation if that is possible.
3126    */
3127   if (auto Y = getAsCarry(TLI, N1)) {
3128     // Because both are carries, Y and Z can be swapped.
3129     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3130       return R;
3131     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3132       return R;
3133   }
3134 
3135   return SDValue();
3136 }
3137 
3138 // Since it may not be valid to emit a fold to zero for vector initializers
3139 // check if we can before folding.
3140 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3141                              SelectionDAG &DAG, bool LegalOperations) {
3142   if (!VT.isVector())
3143     return DAG.getConstant(0, DL, VT);
3144   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3145     return DAG.getConstant(0, DL, VT);
3146   return SDValue();
3147 }
3148 
3149 SDValue DAGCombiner::visitSUB(SDNode *N) {
3150   SDValue N0 = N->getOperand(0);
3151   SDValue N1 = N->getOperand(1);
3152   EVT VT = N0.getValueType();
3153   SDLoc DL(N);
3154 
3155   // fold vector ops
3156   if (VT.isVector()) {
3157     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3158       return FoldedVOp;
3159 
3160     // fold (sub x, 0) -> x, vector edition
3161     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3162       return N0;
3163   }
3164 
3165   // fold (sub x, x) -> 0
3166   // FIXME: Refactor this and xor and other similar operations together.
3167   if (N0 == N1)
3168     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3169 
3170   // fold (sub c1, c2) -> c3
3171   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3172     return C;
3173 
3174   if (SDValue NewSel = foldBinOpIntoSelect(N))
3175     return NewSel;
3176 
3177   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3178 
3179   // fold (sub x, c) -> (add x, -c)
3180   if (N1C) {
3181     return DAG.getNode(ISD::ADD, DL, VT, N0,
3182                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3183   }
3184 
3185   if (isNullOrNullSplat(N0)) {
3186     unsigned BitWidth = VT.getScalarSizeInBits();
3187     // Right-shifting everything out but the sign bit followed by negation is
3188     // the same as flipping arithmetic/logical shift type without the negation:
3189     // -(X >>u 31) -> (X >>s 31)
3190     // -(X >>s 31) -> (X >>u 31)
3191     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3192       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3193       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3194         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3195         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3196           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3197       }
3198     }
3199 
3200     // 0 - X --> 0 if the sub is NUW.
3201     if (N->getFlags().hasNoUnsignedWrap())
3202       return N0;
3203 
3204     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3205       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3206       // N1 must be 0 because negating the minimum signed value is undefined.
3207       if (N->getFlags().hasNoSignedWrap())
3208         return N0;
3209 
3210       // 0 - X --> X if X is 0 or the minimum signed value.
3211       return N1;
3212     }
3213 
3214     // Convert 0 - abs(x).
3215     SDValue Result;
3216     if (N1->getOpcode() == ISD::ABS &&
3217         !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
3218         TLI.expandABS(N1.getNode(), Result, DAG, true))
3219       return Result;
3220   }
3221 
3222   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3223   if (isAllOnesOrAllOnesSplat(N0))
3224     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3225 
3226   // fold (A - (0-B)) -> A+B
3227   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3228     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3229 
3230   // fold A-(A-B) -> B
3231   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3232     return N1.getOperand(1);
3233 
3234   // fold (A+B)-A -> B
3235   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3236     return N0.getOperand(1);
3237 
3238   // fold (A+B)-B -> A
3239   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3240     return N0.getOperand(0);
3241 
3242   // fold (A+C1)-C2 -> A+(C1-C2)
3243   if (N0.getOpcode() == ISD::ADD &&
3244       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3245       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3246     SDValue NewC =
3247         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1});
3248     assert(NewC && "Constant folding failed");
3249     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3250   }
3251 
3252   // fold C2-(A+C1) -> (C2-C1)-A
3253   if (N1.getOpcode() == ISD::ADD) {
3254     SDValue N11 = N1.getOperand(1);
3255     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3256         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3257       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11});
3258       assert(NewC && "Constant folding failed");
3259       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3260     }
3261   }
3262 
3263   // fold (A-C1)-C2 -> A-(C1+C2)
3264   if (N0.getOpcode() == ISD::SUB &&
3265       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3266       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3267     SDValue NewC =
3268         DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1});
3269     assert(NewC && "Constant folding failed");
3270     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3271   }
3272 
3273   // fold (c1-A)-c2 -> (c1-c2)-A
3274   if (N0.getOpcode() == ISD::SUB &&
3275       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3276       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3277     SDValue NewC =
3278         DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1});
3279     assert(NewC && "Constant folding failed");
3280     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3281   }
3282 
3283   // fold ((A+(B+or-C))-B) -> A+or-C
3284   if (N0.getOpcode() == ISD::ADD &&
3285       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3286        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3287       N0.getOperand(1).getOperand(0) == N1)
3288     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3289                        N0.getOperand(1).getOperand(1));
3290 
3291   // fold ((A+(C+B))-B) -> A+C
3292   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3293       N0.getOperand(1).getOperand(1) == N1)
3294     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3295                        N0.getOperand(1).getOperand(0));
3296 
3297   // fold ((A-(B-C))-C) -> A-B
3298   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3299       N0.getOperand(1).getOperand(1) == N1)
3300     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3301                        N0.getOperand(1).getOperand(0));
3302 
3303   // fold (A-(B-C)) -> A+(C-B)
3304   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3305     return DAG.getNode(ISD::ADD, DL, VT, N0,
3306                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3307                                    N1.getOperand(0)));
3308 
3309   // A - (A & B)  ->  A & (~B)
3310   if (N1.getOpcode() == ISD::AND) {
3311     SDValue A = N1.getOperand(0);
3312     SDValue B = N1.getOperand(1);
3313     if (A != N0)
3314       std::swap(A, B);
3315     if (A == N0 &&
3316         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3317       SDValue InvB =
3318           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3319       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3320     }
3321   }
3322 
3323   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3324   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3325     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3326         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3327       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3328                                 N1.getOperand(0).getOperand(1),
3329                                 N1.getOperand(1));
3330       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3331     }
3332     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3333         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3334       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3335                                 N1.getOperand(0),
3336                                 N1.getOperand(1).getOperand(1));
3337       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3338     }
3339   }
3340 
3341   // If either operand of a sub is undef, the result is undef
3342   if (N0.isUndef())
3343     return N0;
3344   if (N1.isUndef())
3345     return N1;
3346 
3347   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3348     return V;
3349 
3350   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3351     return V;
3352 
3353   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3354     return V;
3355 
3356   // (x - y) - 1  ->  add (xor y, -1), x
3357   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3358     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3359                               DAG.getAllOnesConstant(DL, VT));
3360     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3361   }
3362 
3363   // Look for:
3364   //   sub y, (xor x, -1)
3365   // And if the target does not like this form then turn into:
3366   //   add (add x, y), 1
3367   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3368     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3369     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3370   }
3371 
3372   // Hoist one-use addition by non-opaque constant:
3373   //   (x + C) - y  ->  (x - y) + C
3374   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3375       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3376     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3377     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3378   }
3379   // y - (x + C)  ->  (y - x) - C
3380   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3381       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3382     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3383     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3384   }
3385   // (x - C) - y  ->  (x - y) - C
3386   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3387   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3388       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3389     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3390     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3391   }
3392   // (C - x) - y  ->  C - (x + y)
3393   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3394       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3395     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3396     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3397   }
3398 
3399   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3400   // rather than 'sub 0/1' (the sext should get folded).
3401   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3402   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3403       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3404       TLI.getBooleanContents(VT) ==
3405           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3406     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3407     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3408   }
3409 
3410   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3411   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3412     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3413       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3414       SDValue S0 = N1.getOperand(0);
3415       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
3416         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3417           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
3418             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3419     }
3420   }
3421 
3422   // If the relocation model supports it, consider symbol offsets.
3423   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3424     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3425       // fold (sub Sym, c) -> Sym-c
3426       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3427         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3428                                     GA->getOffset() -
3429                                         (uint64_t)N1C->getSExtValue());
3430       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3431       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3432         if (GA->getGlobal() == GB->getGlobal())
3433           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3434                                  DL, VT);
3435     }
3436 
3437   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3438   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3439     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3440     if (TN->getVT() == MVT::i1) {
3441       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3442                                  DAG.getConstant(1, DL, VT));
3443       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3444     }
3445   }
3446 
3447   // canonicalize (sub X, (vscale * C)) to (add X,  (vscale * -C))
3448   if (N1.getOpcode() == ISD::VSCALE) {
3449     const APInt &IntVal = N1.getConstantOperandAPInt(0);
3450     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
3451   }
3452 
3453   // Prefer an add for more folding potential and possibly better codegen:
3454   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3455   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3456     SDValue ShAmt = N1.getOperand(1);
3457     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3458     if (ShAmtC &&
3459         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3460       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3461       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3462     }
3463   }
3464 
3465   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3466     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3467     if (SDValue Carry = getAsCarry(TLI, N0)) {
3468       SDValue X = N1;
3469       SDValue Zero = DAG.getConstant(0, DL, VT);
3470       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3471       return DAG.getNode(ISD::ADDCARRY, DL,
3472                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3473                          Carry);
3474     }
3475   }
3476 
3477   return SDValue();
3478 }
3479 
3480 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3481   SDValue N0 = N->getOperand(0);
3482   SDValue N1 = N->getOperand(1);
3483   EVT VT = N0.getValueType();
3484   SDLoc DL(N);
3485 
3486   // fold vector ops
3487   if (VT.isVector()) {
3488     // TODO SimplifyVBinOp
3489 
3490     // fold (sub_sat x, 0) -> x, vector edition
3491     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3492       return N0;
3493   }
3494 
3495   // fold (sub_sat x, undef) -> 0
3496   if (N0.isUndef() || N1.isUndef())
3497     return DAG.getConstant(0, DL, VT);
3498 
3499   // fold (sub_sat x, x) -> 0
3500   if (N0 == N1)
3501     return DAG.getConstant(0, DL, VT);
3502 
3503   // fold (sub_sat c1, c2) -> c3
3504   if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
3505     return C;
3506 
3507   // fold (sub_sat x, 0) -> x
3508   if (isNullConstant(N1))
3509     return N0;
3510 
3511   return SDValue();
3512 }
3513 
3514 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3515   SDValue N0 = N->getOperand(0);
3516   SDValue N1 = N->getOperand(1);
3517   EVT VT = N0.getValueType();
3518   SDLoc DL(N);
3519 
3520   // If the flag result is dead, turn this into an SUB.
3521   if (!N->hasAnyUseOfValue(1))
3522     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3523                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3524 
3525   // fold (subc x, x) -> 0 + no borrow
3526   if (N0 == N1)
3527     return CombineTo(N, DAG.getConstant(0, DL, VT),
3528                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3529 
3530   // fold (subc x, 0) -> x + no borrow
3531   if (isNullConstant(N1))
3532     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3533 
3534   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3535   if (isAllOnesConstant(N0))
3536     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3537                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3538 
3539   return SDValue();
3540 }
3541 
3542 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3543   SDValue N0 = N->getOperand(0);
3544   SDValue N1 = N->getOperand(1);
3545   EVT VT = N0.getValueType();
3546   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3547 
3548   EVT CarryVT = N->getValueType(1);
3549   SDLoc DL(N);
3550 
3551   // If the flag result is dead, turn this into an SUB.
3552   if (!N->hasAnyUseOfValue(1))
3553     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3554                      DAG.getUNDEF(CarryVT));
3555 
3556   // fold (subo x, x) -> 0 + no borrow
3557   if (N0 == N1)
3558     return CombineTo(N, DAG.getConstant(0, DL, VT),
3559                      DAG.getConstant(0, DL, CarryVT));
3560 
3561   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3562 
3563   // fold (subox, c) -> (addo x, -c)
3564   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3565     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3566                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3567   }
3568 
3569   // fold (subo x, 0) -> x + no borrow
3570   if (isNullOrNullSplat(N1))
3571     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3572 
3573   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3574   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3575     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3576                      DAG.getConstant(0, DL, CarryVT));
3577 
3578   return SDValue();
3579 }
3580 
3581 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3582   SDValue N0 = N->getOperand(0);
3583   SDValue N1 = N->getOperand(1);
3584   SDValue CarryIn = N->getOperand(2);
3585 
3586   // fold (sube x, y, false) -> (subc x, y)
3587   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3588     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3589 
3590   return SDValue();
3591 }
3592 
3593 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3594   SDValue N0 = N->getOperand(0);
3595   SDValue N1 = N->getOperand(1);
3596   SDValue CarryIn = N->getOperand(2);
3597 
3598   // fold (subcarry x, y, false) -> (usubo x, y)
3599   if (isNullConstant(CarryIn)) {
3600     if (!LegalOperations ||
3601         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3602       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3603   }
3604 
3605   return SDValue();
3606 }
3607 
3608 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
3609   SDValue N0 = N->getOperand(0);
3610   SDValue N1 = N->getOperand(1);
3611   SDValue CarryIn = N->getOperand(2);
3612 
3613   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
3614   if (isNullConstant(CarryIn)) {
3615     if (!LegalOperations ||
3616         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
3617       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
3618   }
3619 
3620   return SDValue();
3621 }
3622 
3623 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3624 // UMULFIXSAT here.
3625 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3626   SDValue N0 = N->getOperand(0);
3627   SDValue N1 = N->getOperand(1);
3628   SDValue Scale = N->getOperand(2);
3629   EVT VT = N0.getValueType();
3630 
3631   // fold (mulfix x, undef, scale) -> 0
3632   if (N0.isUndef() || N1.isUndef())
3633     return DAG.getConstant(0, SDLoc(N), VT);
3634 
3635   // Canonicalize constant to RHS (vector doesn't have to splat)
3636   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3637      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3638     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3639 
3640   // fold (mulfix x, 0, scale) -> 0
3641   if (isNullConstant(N1))
3642     return DAG.getConstant(0, SDLoc(N), VT);
3643 
3644   return SDValue();
3645 }
3646 
3647 SDValue DAGCombiner::visitMUL(SDNode *N) {
3648   SDValue N0 = N->getOperand(0);
3649   SDValue N1 = N->getOperand(1);
3650   EVT VT = N0.getValueType();
3651 
3652   // fold (mul x, undef) -> 0
3653   if (N0.isUndef() || N1.isUndef())
3654     return DAG.getConstant(0, SDLoc(N), VT);
3655 
3656   bool N1IsConst = false;
3657   bool N1IsOpaqueConst = false;
3658   APInt ConstValue1;
3659 
3660   // fold vector ops
3661   if (VT.isVector()) {
3662     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3663       return FoldedVOp;
3664 
3665     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3666     assert((!N1IsConst ||
3667             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3668            "Splat APInt should be element width");
3669   } else {
3670     N1IsConst = isa<ConstantSDNode>(N1);
3671     if (N1IsConst) {
3672       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3673       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3674     }
3675   }
3676 
3677   // fold (mul c1, c2) -> c1*c2
3678   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1}))
3679     return C;
3680 
3681   // canonicalize constant to RHS (vector doesn't have to splat)
3682   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3683      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3684     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3685 
3686   // fold (mul x, 0) -> 0
3687   if (N1IsConst && ConstValue1.isNullValue())
3688     return N1;
3689 
3690   // fold (mul x, 1) -> x
3691   if (N1IsConst && ConstValue1.isOneValue())
3692     return N0;
3693 
3694   if (SDValue NewSel = foldBinOpIntoSelect(N))
3695     return NewSel;
3696 
3697   // fold (mul x, -1) -> 0-x
3698   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3699     SDLoc DL(N);
3700     return DAG.getNode(ISD::SUB, DL, VT,
3701                        DAG.getConstant(0, DL, VT), N0);
3702   }
3703 
3704   // fold (mul x, (1 << c)) -> x << c
3705   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3706       DAG.isKnownToBeAPowerOfTwo(N1) &&
3707       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3708     SDLoc DL(N);
3709     SDValue LogBase2 = BuildLogBase2(N1, DL);
3710     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3711     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3712     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3713   }
3714 
3715   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3716   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3717     unsigned Log2Val = (-ConstValue1).logBase2();
3718     SDLoc DL(N);
3719     // FIXME: If the input is something that is easily negated (e.g. a
3720     // single-use add), we should put the negate there.
3721     return DAG.getNode(ISD::SUB, DL, VT,
3722                        DAG.getConstant(0, DL, VT),
3723                        DAG.getNode(ISD::SHL, DL, VT, N0,
3724                             DAG.getConstant(Log2Val, DL,
3725                                       getShiftAmountTy(N0.getValueType()))));
3726   }
3727 
3728   // Try to transform:
3729   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3730   // mul x, (2^N + 1) --> add (shl x, N), x
3731   // mul x, (2^N - 1) --> sub (shl x, N), x
3732   // Examples: x * 33 --> (x << 5) + x
3733   //           x * 15 --> (x << 4) - x
3734   //           x * -33 --> -((x << 5) + x)
3735   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3736   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
3737   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
3738   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
3739   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
3740   //           x * 0xf800 --> (x << 16) - (x << 11)
3741   //           x * -0x8800 --> -((x << 15) + (x << 11))
3742   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
3743   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3744     // TODO: We could handle more general decomposition of any constant by
3745     //       having the target set a limit on number of ops and making a
3746     //       callback to determine that sequence (similar to sqrt expansion).
3747     unsigned MathOp = ISD::DELETED_NODE;
3748     APInt MulC = ConstValue1.abs();
3749     // The constant `2` should be treated as (2^0 + 1).
3750     unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
3751     MulC.lshrInPlace(TZeros);
3752     if ((MulC - 1).isPowerOf2())
3753       MathOp = ISD::ADD;
3754     else if ((MulC + 1).isPowerOf2())
3755       MathOp = ISD::SUB;
3756 
3757     if (MathOp != ISD::DELETED_NODE) {
3758       unsigned ShAmt =
3759           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3760       ShAmt += TZeros;
3761       assert(ShAmt < VT.getScalarSizeInBits() &&
3762              "multiply-by-constant generated out of bounds shift");
3763       SDLoc DL(N);
3764       SDValue Shl =
3765           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3766       SDValue R =
3767           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
3768                                DAG.getNode(ISD::SHL, DL, VT, N0,
3769                                            DAG.getConstant(TZeros, DL, VT)))
3770                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
3771       if (ConstValue1.isNegative())
3772         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3773       return R;
3774     }
3775   }
3776 
3777   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3778   if (N0.getOpcode() == ISD::SHL &&
3779       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3780       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3781     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3782     if (isConstantOrConstantVector(C3))
3783       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3784   }
3785 
3786   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3787   // use.
3788   {
3789     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3790 
3791     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3792     if (N0.getOpcode() == ISD::SHL &&
3793         isConstantOrConstantVector(N0.getOperand(1)) &&
3794         N0.getNode()->hasOneUse()) {
3795       Sh = N0; Y = N1;
3796     } else if (N1.getOpcode() == ISD::SHL &&
3797                isConstantOrConstantVector(N1.getOperand(1)) &&
3798                N1.getNode()->hasOneUse()) {
3799       Sh = N1; Y = N0;
3800     }
3801 
3802     if (Sh.getNode()) {
3803       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3804       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3805     }
3806   }
3807 
3808   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3809   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3810       N0.getOpcode() == ISD::ADD &&
3811       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3812       isMulAddWithConstProfitable(N, N0, N1))
3813       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3814                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3815                                      N0.getOperand(0), N1),
3816                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3817                                      N0.getOperand(1), N1));
3818 
3819   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
3820   if (N0.getOpcode() == ISD::VSCALE)
3821     if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) {
3822       const APInt &C0 = N0.getConstantOperandAPInt(0);
3823       const APInt &C1 = NC1->getAPIntValue();
3824       return DAG.getVScale(SDLoc(N), VT, C0 * C1);
3825     }
3826 
3827   // Fold ((mul x, 0/undef) -> 0,
3828   //       (mul x, 1) -> x) -> x)
3829   // -> and(x, mask)
3830   // We can replace vectors with '0' and '1' factors with a clearing mask.
3831   if (VT.isFixedLengthVector()) {
3832     unsigned NumElts = VT.getVectorNumElements();
3833     SmallBitVector ClearMask;
3834     ClearMask.reserve(NumElts);
3835     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
3836       if (!V || V->isNullValue()) {
3837         ClearMask.push_back(true);
3838         return true;
3839       }
3840       ClearMask.push_back(false);
3841       return V->isOne();
3842     };
3843     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
3844         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
3845       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
3846       SDLoc DL(N);
3847       EVT LegalSVT = N1.getOperand(0).getValueType();
3848       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
3849       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
3850       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
3851       for (unsigned I = 0; I != NumElts; ++I)
3852         if (ClearMask[I])
3853           Mask[I] = Zero;
3854       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
3855     }
3856   }
3857 
3858   // reassociate mul
3859   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3860     return RMUL;
3861 
3862   return SDValue();
3863 }
3864 
3865 /// Return true if divmod libcall is available.
3866 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3867                                      const TargetLowering &TLI) {
3868   RTLIB::Libcall LC;
3869   EVT NodeType = Node->getValueType(0);
3870   if (!NodeType.isSimple())
3871     return false;
3872   switch (NodeType.getSimpleVT().SimpleTy) {
3873   default: return false; // No libcall for vector types.
3874   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3875   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3876   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3877   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3878   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3879   }
3880 
3881   return TLI.getLibcallName(LC) != nullptr;
3882 }
3883 
3884 /// Issue divrem if both quotient and remainder are needed.
3885 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3886   if (Node->use_empty())
3887     return SDValue(); // This is a dead node, leave it alone.
3888 
3889   unsigned Opcode = Node->getOpcode();
3890   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3891   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3892 
3893   // DivMod lib calls can still work on non-legal types if using lib-calls.
3894   EVT VT = Node->getValueType(0);
3895   if (VT.isVector() || !VT.isInteger())
3896     return SDValue();
3897 
3898   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3899     return SDValue();
3900 
3901   // If DIVREM is going to get expanded into a libcall,
3902   // but there is no libcall available, then don't combine.
3903   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3904       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3905     return SDValue();
3906 
3907   // If div is legal, it's better to do the normal expansion
3908   unsigned OtherOpcode = 0;
3909   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3910     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3911     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3912       return SDValue();
3913   } else {
3914     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3915     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3916       return SDValue();
3917   }
3918 
3919   SDValue Op0 = Node->getOperand(0);
3920   SDValue Op1 = Node->getOperand(1);
3921   SDValue combined;
3922   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3923          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3924     SDNode *User = *UI;
3925     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3926         User->use_empty())
3927       continue;
3928     // Convert the other matching node(s), too;
3929     // otherwise, the DIVREM may get target-legalized into something
3930     // target-specific that we won't be able to recognize.
3931     unsigned UserOpc = User->getOpcode();
3932     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3933         User->getOperand(0) == Op0 &&
3934         User->getOperand(1) == Op1) {
3935       if (!combined) {
3936         if (UserOpc == OtherOpcode) {
3937           SDVTList VTs = DAG.getVTList(VT, VT);
3938           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3939         } else if (UserOpc == DivRemOpc) {
3940           combined = SDValue(User, 0);
3941         } else {
3942           assert(UserOpc == Opcode);
3943           continue;
3944         }
3945       }
3946       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3947         CombineTo(User, combined);
3948       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3949         CombineTo(User, combined.getValue(1));
3950     }
3951   }
3952   return combined;
3953 }
3954 
3955 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3956   SDValue N0 = N->getOperand(0);
3957   SDValue N1 = N->getOperand(1);
3958   EVT VT = N->getValueType(0);
3959   SDLoc DL(N);
3960 
3961   unsigned Opc = N->getOpcode();
3962   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3963   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3964 
3965   // X / undef -> undef
3966   // X % undef -> undef
3967   // X / 0 -> undef
3968   // X % 0 -> undef
3969   // NOTE: This includes vectors where any divisor element is zero/undef.
3970   if (DAG.isUndef(Opc, {N0, N1}))
3971     return DAG.getUNDEF(VT);
3972 
3973   // undef / X -> 0
3974   // undef % X -> 0
3975   if (N0.isUndef())
3976     return DAG.getConstant(0, DL, VT);
3977 
3978   // 0 / X -> 0
3979   // 0 % X -> 0
3980   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3981   if (N0C && N0C->isNullValue())
3982     return N0;
3983 
3984   // X / X -> 1
3985   // X % X -> 0
3986   if (N0 == N1)
3987     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3988 
3989   // X / 1 -> X
3990   // X % 1 -> 0
3991   // If this is a boolean op (single-bit element type), we can't have
3992   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3993   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3994   // it's a 1.
3995   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3996     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3997 
3998   return SDValue();
3999 }
4000 
4001 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4002   SDValue N0 = N->getOperand(0);
4003   SDValue N1 = N->getOperand(1);
4004   EVT VT = N->getValueType(0);
4005   EVT CCVT = getSetCCResultType(VT);
4006 
4007   // fold vector ops
4008   if (VT.isVector())
4009     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4010       return FoldedVOp;
4011 
4012   SDLoc DL(N);
4013 
4014   // fold (sdiv c1, c2) -> c1/c2
4015   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4016   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4017     return C;
4018 
4019   // fold (sdiv X, -1) -> 0-X
4020   if (N1C && N1C->isAllOnesValue())
4021     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
4022 
4023   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4024   if (N1C && N1C->getAPIntValue().isMinSignedValue())
4025     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4026                          DAG.getConstant(1, DL, VT),
4027                          DAG.getConstant(0, DL, VT));
4028 
4029   if (SDValue V = simplifyDivRem(N, DAG))
4030     return V;
4031 
4032   if (SDValue NewSel = foldBinOpIntoSelect(N))
4033     return NewSel;
4034 
4035   // If we know the sign bits of both operands are zero, strength reduce to a
4036   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4037   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4038     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4039 
4040   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4041     // If the corresponding remainder node exists, update its users with
4042     // (Dividend - (Quotient * Divisor).
4043     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4044                                               { N0, N1 })) {
4045       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4046       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4047       AddToWorklist(Mul.getNode());
4048       AddToWorklist(Sub.getNode());
4049       CombineTo(RemNode, Sub);
4050     }
4051     return V;
4052   }
4053 
4054   // sdiv, srem -> sdivrem
4055   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4056   // true.  Otherwise, we break the simplification logic in visitREM().
4057   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4058   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4059     if (SDValue DivRem = useDivRem(N))
4060         return DivRem;
4061 
4062   return SDValue();
4063 }
4064 
4065 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4066   SDLoc DL(N);
4067   EVT VT = N->getValueType(0);
4068   EVT CCVT = getSetCCResultType(VT);
4069   unsigned BitWidth = VT.getScalarSizeInBits();
4070 
4071   // Helper for determining whether a value is a power-2 constant scalar or a
4072   // vector of such elements.
4073   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4074     if (C->isNullValue() || C->isOpaque())
4075       return false;
4076     if (C->getAPIntValue().isPowerOf2())
4077       return true;
4078     if ((-C->getAPIntValue()).isPowerOf2())
4079       return true;
4080     return false;
4081   };
4082 
4083   // fold (sdiv X, pow2) -> simple ops after legalize
4084   // FIXME: We check for the exact bit here because the generic lowering gives
4085   // better results in that case. The target-specific lowering should learn how
4086   // to handle exact sdivs efficiently.
4087   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
4088     // Target-specific implementation of sdiv x, pow2.
4089     if (SDValue Res = BuildSDIVPow2(N))
4090       return Res;
4091 
4092     // Create constants that are functions of the shift amount value.
4093     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4094     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4095     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4096     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4097     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4098     if (!isConstantOrConstantVector(Inexact))
4099       return SDValue();
4100 
4101     // Splat the sign bit into the register
4102     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4103                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4104     AddToWorklist(Sign.getNode());
4105 
4106     // Add (N0 < 0) ? abs2 - 1 : 0;
4107     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4108     AddToWorklist(Srl.getNode());
4109     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4110     AddToWorklist(Add.getNode());
4111     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4112     AddToWorklist(Sra.getNode());
4113 
4114     // Special case: (sdiv X, 1) -> X
4115     // Special Case: (sdiv X, -1) -> 0-X
4116     SDValue One = DAG.getConstant(1, DL, VT);
4117     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4118     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4119     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4120     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4121     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4122 
4123     // If dividing by a positive value, we're done. Otherwise, the result must
4124     // be negated.
4125     SDValue Zero = DAG.getConstant(0, DL, VT);
4126     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4127 
4128     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4129     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4130     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4131     return Res;
4132   }
4133 
4134   // If integer divide is expensive and we satisfy the requirements, emit an
4135   // alternate sequence.  Targets may check function attributes for size/speed
4136   // trade-offs.
4137   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4138   if (isConstantOrConstantVector(N1) &&
4139       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4140     if (SDValue Op = BuildSDIV(N))
4141       return Op;
4142 
4143   return SDValue();
4144 }
4145 
4146 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4147   SDValue N0 = N->getOperand(0);
4148   SDValue N1 = N->getOperand(1);
4149   EVT VT = N->getValueType(0);
4150   EVT CCVT = getSetCCResultType(VT);
4151 
4152   // fold vector ops
4153   if (VT.isVector())
4154     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4155       return FoldedVOp;
4156 
4157   SDLoc DL(N);
4158 
4159   // fold (udiv c1, c2) -> c1/c2
4160   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4161   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4162     return C;
4163 
4164   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4165   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4166     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4167                          DAG.getConstant(1, DL, VT),
4168                          DAG.getConstant(0, DL, VT));
4169 
4170   if (SDValue V = simplifyDivRem(N, DAG))
4171     return V;
4172 
4173   if (SDValue NewSel = foldBinOpIntoSelect(N))
4174     return NewSel;
4175 
4176   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4177     // If the corresponding remainder node exists, update its users with
4178     // (Dividend - (Quotient * Divisor).
4179     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4180                                               { N0, N1 })) {
4181       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4182       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4183       AddToWorklist(Mul.getNode());
4184       AddToWorklist(Sub.getNode());
4185       CombineTo(RemNode, Sub);
4186     }
4187     return V;
4188   }
4189 
4190   // sdiv, srem -> sdivrem
4191   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4192   // true.  Otherwise, we break the simplification logic in visitREM().
4193   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4194   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4195     if (SDValue DivRem = useDivRem(N))
4196         return DivRem;
4197 
4198   return SDValue();
4199 }
4200 
4201 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4202   SDLoc DL(N);
4203   EVT VT = N->getValueType(0);
4204 
4205   // fold (udiv x, (1 << c)) -> x >>u c
4206   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4207       DAG.isKnownToBeAPowerOfTwo(N1)) {
4208     SDValue LogBase2 = BuildLogBase2(N1, DL);
4209     AddToWorklist(LogBase2.getNode());
4210 
4211     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4212     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4213     AddToWorklist(Trunc.getNode());
4214     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4215   }
4216 
4217   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4218   if (N1.getOpcode() == ISD::SHL) {
4219     SDValue N10 = N1.getOperand(0);
4220     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4221         DAG.isKnownToBeAPowerOfTwo(N10)) {
4222       SDValue LogBase2 = BuildLogBase2(N10, DL);
4223       AddToWorklist(LogBase2.getNode());
4224 
4225       EVT ADDVT = N1.getOperand(1).getValueType();
4226       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4227       AddToWorklist(Trunc.getNode());
4228       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4229       AddToWorklist(Add.getNode());
4230       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4231     }
4232   }
4233 
4234   // fold (udiv x, c) -> alternate
4235   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4236   if (isConstantOrConstantVector(N1) &&
4237       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4238     if (SDValue Op = BuildUDIV(N))
4239       return Op;
4240 
4241   return SDValue();
4242 }
4243 
4244 // handles ISD::SREM and ISD::UREM
4245 SDValue DAGCombiner::visitREM(SDNode *N) {
4246   unsigned Opcode = N->getOpcode();
4247   SDValue N0 = N->getOperand(0);
4248   SDValue N1 = N->getOperand(1);
4249   EVT VT = N->getValueType(0);
4250   EVT CCVT = getSetCCResultType(VT);
4251 
4252   bool isSigned = (Opcode == ISD::SREM);
4253   SDLoc DL(N);
4254 
4255   // fold (rem c1, c2) -> c1%c2
4256   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4257   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4258     return C;
4259 
4260   // fold (urem X, -1) -> select(X == -1, 0, x)
4261   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4262     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4263                          DAG.getConstant(0, DL, VT), N0);
4264 
4265   if (SDValue V = simplifyDivRem(N, DAG))
4266     return V;
4267 
4268   if (SDValue NewSel = foldBinOpIntoSelect(N))
4269     return NewSel;
4270 
4271   if (isSigned) {
4272     // If we know the sign bits of both operands are zero, strength reduce to a
4273     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4274     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4275       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4276   } else {
4277     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4278     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4279       // fold (urem x, pow2) -> (and x, pow2-1)
4280       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4281       AddToWorklist(Add.getNode());
4282       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4283     }
4284     if (N1.getOpcode() == ISD::SHL &&
4285         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4286       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4287       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4288       AddToWorklist(Add.getNode());
4289       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4290     }
4291   }
4292 
4293   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4294 
4295   // If X/C can be simplified by the division-by-constant logic, lower
4296   // X%C to the equivalent of X-X/C*C.
4297   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4298   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4299   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4300   // combine will not return a DIVREM.  Regardless, checking cheapness here
4301   // makes sense since the simplification results in fatter code.
4302   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4303     SDValue OptimizedDiv =
4304         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4305     if (OptimizedDiv.getNode()) {
4306       // If the equivalent Div node also exists, update its users.
4307       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4308       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4309                                                 { N0, N1 }))
4310         CombineTo(DivNode, OptimizedDiv);
4311       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4312       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4313       AddToWorklist(OptimizedDiv.getNode());
4314       AddToWorklist(Mul.getNode());
4315       return Sub;
4316     }
4317   }
4318 
4319   // sdiv, srem -> sdivrem
4320   if (SDValue DivRem = useDivRem(N))
4321     return DivRem.getValue(1);
4322 
4323   return SDValue();
4324 }
4325 
4326 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4327   SDValue N0 = N->getOperand(0);
4328   SDValue N1 = N->getOperand(1);
4329   EVT VT = N->getValueType(0);
4330   SDLoc DL(N);
4331 
4332   if (VT.isVector()) {
4333     // fold (mulhs x, 0) -> 0
4334     // do not return N0/N1, because undef node may exist.
4335     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4336         ISD::isBuildVectorAllZeros(N1.getNode()))
4337       return DAG.getConstant(0, DL, VT);
4338   }
4339 
4340   // fold (mulhs x, 0) -> 0
4341   if (isNullConstant(N1))
4342     return N1;
4343   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4344   if (isOneConstant(N1))
4345     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4346                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4347                                        getShiftAmountTy(N0.getValueType())));
4348 
4349   // fold (mulhs x, undef) -> 0
4350   if (N0.isUndef() || N1.isUndef())
4351     return DAG.getConstant(0, DL, VT);
4352 
4353   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4354   // plus a shift.
4355   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
4356       !VT.isVector()) {
4357     MVT Simple = VT.getSimpleVT();
4358     unsigned SimpleSize = Simple.getSizeInBits();
4359     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4360     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4361       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4362       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4363       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4364       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4365             DAG.getConstant(SimpleSize, DL,
4366                             getShiftAmountTy(N1.getValueType())));
4367       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4368     }
4369   }
4370 
4371   return SDValue();
4372 }
4373 
4374 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4375   SDValue N0 = N->getOperand(0);
4376   SDValue N1 = N->getOperand(1);
4377   EVT VT = N->getValueType(0);
4378   SDLoc DL(N);
4379 
4380   if (VT.isVector()) {
4381     // fold (mulhu x, 0) -> 0
4382     // do not return N0/N1, because undef node may exist.
4383     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4384         ISD::isBuildVectorAllZeros(N1.getNode()))
4385       return DAG.getConstant(0, DL, VT);
4386   }
4387 
4388   // fold (mulhu x, 0) -> 0
4389   if (isNullConstant(N1))
4390     return N1;
4391   // fold (mulhu x, 1) -> 0
4392   if (isOneConstant(N1))
4393     return DAG.getConstant(0, DL, N0.getValueType());
4394   // fold (mulhu x, undef) -> 0
4395   if (N0.isUndef() || N1.isUndef())
4396     return DAG.getConstant(0, DL, VT);
4397 
4398   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4399   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4400       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4401     unsigned NumEltBits = VT.getScalarSizeInBits();
4402     SDValue LogBase2 = BuildLogBase2(N1, DL);
4403     SDValue SRLAmt = DAG.getNode(
4404         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4405     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4406     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4407     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4408   }
4409 
4410   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4411   // plus a shift.
4412   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
4413       !VT.isVector()) {
4414     MVT Simple = VT.getSimpleVT();
4415     unsigned SimpleSize = Simple.getSizeInBits();
4416     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4417     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4418       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4419       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4420       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4421       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4422             DAG.getConstant(SimpleSize, DL,
4423                             getShiftAmountTy(N1.getValueType())));
4424       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4425     }
4426   }
4427 
4428   return SDValue();
4429 }
4430 
4431 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4432 /// give the opcodes for the two computations that are being performed. Return
4433 /// true if a simplification was made.
4434 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4435                                                 unsigned HiOp) {
4436   // If the high half is not needed, just compute the low half.
4437   bool HiExists = N->hasAnyUseOfValue(1);
4438   if (!HiExists && (!LegalOperations ||
4439                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4440     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4441     return CombineTo(N, Res, Res);
4442   }
4443 
4444   // If the low half is not needed, just compute the high half.
4445   bool LoExists = N->hasAnyUseOfValue(0);
4446   if (!LoExists && (!LegalOperations ||
4447                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4448     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4449     return CombineTo(N, Res, Res);
4450   }
4451 
4452   // If both halves are used, return as it is.
4453   if (LoExists && HiExists)
4454     return SDValue();
4455 
4456   // If the two computed results can be simplified separately, separate them.
4457   if (LoExists) {
4458     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4459     AddToWorklist(Lo.getNode());
4460     SDValue LoOpt = combine(Lo.getNode());
4461     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4462         (!LegalOperations ||
4463          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4464       return CombineTo(N, LoOpt, LoOpt);
4465   }
4466 
4467   if (HiExists) {
4468     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4469     AddToWorklist(Hi.getNode());
4470     SDValue HiOpt = combine(Hi.getNode());
4471     if (HiOpt.getNode() && HiOpt != Hi &&
4472         (!LegalOperations ||
4473          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4474       return CombineTo(N, HiOpt, HiOpt);
4475   }
4476 
4477   return SDValue();
4478 }
4479 
4480 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4481   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4482     return Res;
4483 
4484   EVT VT = N->getValueType(0);
4485   SDLoc DL(N);
4486 
4487   // If the type is twice as wide is legal, transform the mulhu to a wider
4488   // multiply plus a shift.
4489   if (VT.isSimple() && !VT.isVector()) {
4490     MVT Simple = VT.getSimpleVT();
4491     unsigned SimpleSize = Simple.getSizeInBits();
4492     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4493     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4494       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4495       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4496       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4497       // Compute the high part as N1.
4498       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4499             DAG.getConstant(SimpleSize, DL,
4500                             getShiftAmountTy(Lo.getValueType())));
4501       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4502       // Compute the low part as N0.
4503       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4504       return CombineTo(N, Lo, Hi);
4505     }
4506   }
4507 
4508   return SDValue();
4509 }
4510 
4511 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4512   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4513     return Res;
4514 
4515   EVT VT = N->getValueType(0);
4516   SDLoc DL(N);
4517 
4518   // (umul_lohi N0, 0) -> (0, 0)
4519   if (isNullConstant(N->getOperand(1))) {
4520     SDValue Zero = DAG.getConstant(0, DL, VT);
4521     return CombineTo(N, Zero, Zero);
4522   }
4523 
4524   // (umul_lohi N0, 1) -> (N0, 0)
4525   if (isOneConstant(N->getOperand(1))) {
4526     SDValue Zero = DAG.getConstant(0, DL, VT);
4527     return CombineTo(N, N->getOperand(0), Zero);
4528   }
4529 
4530   // If the type is twice as wide is legal, transform the mulhu to a wider
4531   // multiply plus a shift.
4532   if (VT.isSimple() && !VT.isVector()) {
4533     MVT Simple = VT.getSimpleVT();
4534     unsigned SimpleSize = Simple.getSizeInBits();
4535     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4536     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4537       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4538       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4539       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4540       // Compute the high part as N1.
4541       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4542             DAG.getConstant(SimpleSize, DL,
4543                             getShiftAmountTy(Lo.getValueType())));
4544       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4545       // Compute the low part as N0.
4546       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4547       return CombineTo(N, Lo, Hi);
4548     }
4549   }
4550 
4551   return SDValue();
4552 }
4553 
4554 SDValue DAGCombiner::visitMULO(SDNode *N) {
4555   SDValue N0 = N->getOperand(0);
4556   SDValue N1 = N->getOperand(1);
4557   EVT VT = N0.getValueType();
4558   bool IsSigned = (ISD::SMULO == N->getOpcode());
4559 
4560   EVT CarryVT = N->getValueType(1);
4561   SDLoc DL(N);
4562 
4563   // canonicalize constant to RHS.
4564   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4565       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4566     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4567 
4568   // fold (mulo x, 0) -> 0 + no carry out
4569   if (isNullOrNullSplat(N1))
4570     return CombineTo(N, DAG.getConstant(0, DL, VT),
4571                      DAG.getConstant(0, DL, CarryVT));
4572 
4573   // (mulo x, 2) -> (addo x, x)
4574   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4575     if (C2->getAPIntValue() == 2)
4576       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4577                          N->getVTList(), N0, N0);
4578 
4579   return SDValue();
4580 }
4581 
4582 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4583   SDValue N0 = N->getOperand(0);
4584   SDValue N1 = N->getOperand(1);
4585   EVT VT = N0.getValueType();
4586   unsigned Opcode = N->getOpcode();
4587 
4588   // fold vector ops
4589   if (VT.isVector())
4590     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4591       return FoldedVOp;
4592 
4593   // fold operation with constant operands.
4594   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
4595     return C;
4596 
4597   // canonicalize constant to RHS
4598   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4599       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4600     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4601 
4602   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4603   // Only do this if the current op isn't legal and the flipped is.
4604   if (!TLI.isOperationLegal(Opcode, VT) &&
4605       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4606       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4607     unsigned AltOpcode;
4608     switch (Opcode) {
4609     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4610     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4611     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4612     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4613     default: llvm_unreachable("Unknown MINMAX opcode");
4614     }
4615     if (TLI.isOperationLegal(AltOpcode, VT))
4616       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4617   }
4618 
4619   return SDValue();
4620 }
4621 
4622 /// If this is a bitwise logic instruction and both operands have the same
4623 /// opcode, try to sink the other opcode after the logic instruction.
4624 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4625   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4626   EVT VT = N0.getValueType();
4627   unsigned LogicOpcode = N->getOpcode();
4628   unsigned HandOpcode = N0.getOpcode();
4629   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4630           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4631   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4632 
4633   // Bail early if none of these transforms apply.
4634   if (N0.getNumOperands() == 0)
4635     return SDValue();
4636 
4637   // FIXME: We should check number of uses of the operands to not increase
4638   //        the instruction count for all transforms.
4639 
4640   // Handle size-changing casts.
4641   SDValue X = N0.getOperand(0);
4642   SDValue Y = N1.getOperand(0);
4643   EVT XVT = X.getValueType();
4644   SDLoc DL(N);
4645   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4646       HandOpcode == ISD::SIGN_EXTEND) {
4647     // If both operands have other uses, this transform would create extra
4648     // instructions without eliminating anything.
4649     if (!N0.hasOneUse() && !N1.hasOneUse())
4650       return SDValue();
4651     // We need matching integer source types.
4652     if (XVT != Y.getValueType())
4653       return SDValue();
4654     // Don't create an illegal op during or after legalization. Don't ever
4655     // create an unsupported vector op.
4656     if ((VT.isVector() || LegalOperations) &&
4657         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4658       return SDValue();
4659     // Avoid infinite looping with PromoteIntBinOp.
4660     // TODO: Should we apply desirable/legal constraints to all opcodes?
4661     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4662         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4663       return SDValue();
4664     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4665     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4666     return DAG.getNode(HandOpcode, DL, VT, Logic);
4667   }
4668 
4669   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4670   if (HandOpcode == ISD::TRUNCATE) {
4671     // If both operands have other uses, this transform would create extra
4672     // instructions without eliminating anything.
4673     if (!N0.hasOneUse() && !N1.hasOneUse())
4674       return SDValue();
4675     // We need matching source types.
4676     if (XVT != Y.getValueType())
4677       return SDValue();
4678     // Don't create an illegal op during or after legalization.
4679     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4680       return SDValue();
4681     // Be extra careful sinking truncate. If it's free, there's no benefit in
4682     // widening a binop. Also, don't create a logic op on an illegal type.
4683     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4684       return SDValue();
4685     if (!TLI.isTypeLegal(XVT))
4686       return SDValue();
4687     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4688     return DAG.getNode(HandOpcode, DL, VT, Logic);
4689   }
4690 
4691   // For binops SHL/SRL/SRA/AND:
4692   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4693   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4694        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4695       N0.getOperand(1) == N1.getOperand(1)) {
4696     // If either operand has other uses, this transform is not an improvement.
4697     if (!N0.hasOneUse() || !N1.hasOneUse())
4698       return SDValue();
4699     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4700     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4701   }
4702 
4703   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4704   if (HandOpcode == ISD::BSWAP) {
4705     // If either operand has other uses, this transform is not an improvement.
4706     if (!N0.hasOneUse() || !N1.hasOneUse())
4707       return SDValue();
4708     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4709     return DAG.getNode(HandOpcode, DL, VT, Logic);
4710   }
4711 
4712   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4713   // Only perform this optimization up until type legalization, before
4714   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4715   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4716   // we don't want to undo this promotion.
4717   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4718   // on scalars.
4719   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4720        Level <= AfterLegalizeTypes) {
4721     // Input types must be integer and the same.
4722     if (XVT.isInteger() && XVT == Y.getValueType() &&
4723         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4724           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4725       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4726       return DAG.getNode(HandOpcode, DL, VT, Logic);
4727     }
4728   }
4729 
4730   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4731   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4732   // If both shuffles use the same mask, and both shuffle within a single
4733   // vector, then it is worthwhile to move the swizzle after the operation.
4734   // The type-legalizer generates this pattern when loading illegal
4735   // vector types from memory. In many cases this allows additional shuffle
4736   // optimizations.
4737   // There are other cases where moving the shuffle after the xor/and/or
4738   // is profitable even if shuffles don't perform a swizzle.
4739   // If both shuffles use the same mask, and both shuffles have the same first
4740   // or second operand, then it might still be profitable to move the shuffle
4741   // after the xor/and/or operation.
4742   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4743     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4744     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4745     assert(X.getValueType() == Y.getValueType() &&
4746            "Inputs to shuffles are not the same type");
4747 
4748     // Check that both shuffles use the same mask. The masks are known to be of
4749     // the same length because the result vector type is the same.
4750     // Check also that shuffles have only one use to avoid introducing extra
4751     // instructions.
4752     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4753         !SVN0->getMask().equals(SVN1->getMask()))
4754       return SDValue();
4755 
4756     // Don't try to fold this node if it requires introducing a
4757     // build vector of all zeros that might be illegal at this stage.
4758     SDValue ShOp = N0.getOperand(1);
4759     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4760       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4761 
4762     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4763     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4764       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4765                                   N0.getOperand(0), N1.getOperand(0));
4766       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4767     }
4768 
4769     // Don't try to fold this node if it requires introducing a
4770     // build vector of all zeros that might be illegal at this stage.
4771     ShOp = N0.getOperand(0);
4772     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4773       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4774 
4775     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4776     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4777       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4778                                   N1.getOperand(1));
4779       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4780     }
4781   }
4782 
4783   return SDValue();
4784 }
4785 
4786 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4787 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4788                                        const SDLoc &DL) {
4789   SDValue LL, LR, RL, RR, N0CC, N1CC;
4790   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4791       !isSetCCEquivalent(N1, RL, RR, N1CC))
4792     return SDValue();
4793 
4794   assert(N0.getValueType() == N1.getValueType() &&
4795          "Unexpected operand types for bitwise logic op");
4796   assert(LL.getValueType() == LR.getValueType() &&
4797          RL.getValueType() == RR.getValueType() &&
4798          "Unexpected operand types for setcc");
4799 
4800   // If we're here post-legalization or the logic op type is not i1, the logic
4801   // op type must match a setcc result type. Also, all folds require new
4802   // operations on the left and right operands, so those types must match.
4803   EVT VT = N0.getValueType();
4804   EVT OpVT = LL.getValueType();
4805   if (LegalOperations || VT.getScalarType() != MVT::i1)
4806     if (VT != getSetCCResultType(OpVT))
4807       return SDValue();
4808   if (OpVT != RL.getValueType())
4809     return SDValue();
4810 
4811   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4812   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4813   bool IsInteger = OpVT.isInteger();
4814   if (LR == RR && CC0 == CC1 && IsInteger) {
4815     bool IsZero = isNullOrNullSplat(LR);
4816     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4817 
4818     // All bits clear?
4819     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4820     // All sign bits clear?
4821     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4822     // Any bits set?
4823     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4824     // Any sign bits set?
4825     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4826 
4827     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4828     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4829     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4830     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4831     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4832       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4833       AddToWorklist(Or.getNode());
4834       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4835     }
4836 
4837     // All bits set?
4838     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4839     // All sign bits set?
4840     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4841     // Any bits clear?
4842     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4843     // Any sign bits clear?
4844     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4845 
4846     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4847     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4848     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4849     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4850     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4851       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4852       AddToWorklist(And.getNode());
4853       return DAG.getSetCC(DL, VT, And, LR, CC1);
4854     }
4855   }
4856 
4857   // TODO: What is the 'or' equivalent of this fold?
4858   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4859   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4860       IsInteger && CC0 == ISD::SETNE &&
4861       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4862        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4863     SDValue One = DAG.getConstant(1, DL, OpVT);
4864     SDValue Two = DAG.getConstant(2, DL, OpVT);
4865     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4866     AddToWorklist(Add.getNode());
4867     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4868   }
4869 
4870   // Try more general transforms if the predicates match and the only user of
4871   // the compares is the 'and' or 'or'.
4872   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4873       N0.hasOneUse() && N1.hasOneUse()) {
4874     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4875     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4876     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4877       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4878       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4879       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4880       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4881       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4882     }
4883 
4884     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4885     // TODO - support non-uniform vector amounts.
4886     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4887       // Match a shared variable operand and 2 non-opaque constant operands.
4888       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4889       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4890       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4891         // Canonicalize larger constant as C0.
4892         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4893           std::swap(C0, C1);
4894 
4895         // The difference of the constants must be a single bit.
4896         const APInt &C0Val = C0->getAPIntValue();
4897         const APInt &C1Val = C1->getAPIntValue();
4898         if ((C0Val - C1Val).isPowerOf2()) {
4899           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4900           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4901           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4902           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4903           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4904           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4905           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4906           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4907         }
4908       }
4909     }
4910   }
4911 
4912   // Canonicalize equivalent operands to LL == RL.
4913   if (LL == RR && LR == RL) {
4914     CC1 = ISD::getSetCCSwappedOperands(CC1);
4915     std::swap(RL, RR);
4916   }
4917 
4918   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4919   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4920   if (LL == RL && LR == RR) {
4921     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
4922                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
4923     if (NewCC != ISD::SETCC_INVALID &&
4924         (!LegalOperations ||
4925          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4926           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4927       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4928   }
4929 
4930   return SDValue();
4931 }
4932 
4933 /// This contains all DAGCombine rules which reduce two values combined by
4934 /// an And operation to a single value. This makes them reusable in the context
4935 /// of visitSELECT(). Rules involving constants are not included as
4936 /// visitSELECT() already handles those cases.
4937 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4938   EVT VT = N1.getValueType();
4939   SDLoc DL(N);
4940 
4941   // fold (and x, undef) -> 0
4942   if (N0.isUndef() || N1.isUndef())
4943     return DAG.getConstant(0, DL, VT);
4944 
4945   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4946     return V;
4947 
4948   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4949       VT.getSizeInBits() <= 64) {
4950     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4951       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4952         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4953         // immediate for an add, but it is legal if its top c2 bits are set,
4954         // transform the ADD so the immediate doesn't need to be materialized
4955         // in a register.
4956         APInt ADDC = ADDI->getAPIntValue();
4957         APInt SRLC = SRLI->getAPIntValue();
4958         if (ADDC.getMinSignedBits() <= 64 &&
4959             SRLC.ult(VT.getSizeInBits()) &&
4960             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4961           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4962                                              SRLC.getZExtValue());
4963           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4964             ADDC |= Mask;
4965             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4966               SDLoc DL0(N0);
4967               SDValue NewAdd =
4968                 DAG.getNode(ISD::ADD, DL0, VT,
4969                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4970               CombineTo(N0.getNode(), NewAdd);
4971               // Return N so it doesn't get rechecked!
4972               return SDValue(N, 0);
4973             }
4974           }
4975         }
4976       }
4977     }
4978   }
4979 
4980   // Reduce bit extract of low half of an integer to the narrower type.
4981   // (and (srl i64:x, K), KMask) ->
4982   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4983   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4984     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4985       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4986         unsigned Size = VT.getSizeInBits();
4987         const APInt &AndMask = CAnd->getAPIntValue();
4988         unsigned ShiftBits = CShift->getZExtValue();
4989 
4990         // Bail out, this node will probably disappear anyway.
4991         if (ShiftBits == 0)
4992           return SDValue();
4993 
4994         unsigned MaskBits = AndMask.countTrailingOnes();
4995         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4996 
4997         if (AndMask.isMask() &&
4998             // Required bits must not span the two halves of the integer and
4999             // must fit in the half size type.
5000             (ShiftBits + MaskBits <= Size / 2) &&
5001             TLI.isNarrowingProfitable(VT, HalfVT) &&
5002             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
5003             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
5004             TLI.isTruncateFree(VT, HalfVT) &&
5005             TLI.isZExtFree(HalfVT, VT)) {
5006           // The isNarrowingProfitable is to avoid regressions on PPC and
5007           // AArch64 which match a few 64-bit bit insert / bit extract patterns
5008           // on downstream users of this. Those patterns could probably be
5009           // extended to handle extensions mixed in.
5010 
5011           SDValue SL(N0);
5012           assert(MaskBits <= Size);
5013 
5014           // Extracting the highest bit of the low half.
5015           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
5016           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
5017                                       N0.getOperand(0));
5018 
5019           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
5020           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
5021           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
5022           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
5023           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
5024         }
5025       }
5026     }
5027   }
5028 
5029   return SDValue();
5030 }
5031 
5032 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
5033                                    EVT LoadResultTy, EVT &ExtVT) {
5034   if (!AndC->getAPIntValue().isMask())
5035     return false;
5036 
5037   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
5038 
5039   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5040   EVT LoadedVT = LoadN->getMemoryVT();
5041 
5042   if (ExtVT == LoadedVT &&
5043       (!LegalOperations ||
5044        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
5045     // ZEXTLOAD will match without needing to change the size of the value being
5046     // loaded.
5047     return true;
5048   }
5049 
5050   // Do not change the width of a volatile or atomic loads.
5051   if (!LoadN->isSimple())
5052     return false;
5053 
5054   // Do not generate loads of non-round integer types since these can
5055   // be expensive (and would be wrong if the type is not byte sized).
5056   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
5057     return false;
5058 
5059   if (LegalOperations &&
5060       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
5061     return false;
5062 
5063   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
5064     return false;
5065 
5066   return true;
5067 }
5068 
5069 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
5070                                     ISD::LoadExtType ExtType, EVT &MemVT,
5071                                     unsigned ShAmt) {
5072   if (!LDST)
5073     return false;
5074   // Only allow byte offsets.
5075   if (ShAmt % 8)
5076     return false;
5077 
5078   // Do not generate loads of non-round integer types since these can
5079   // be expensive (and would be wrong if the type is not byte sized).
5080   if (!MemVT.isRound())
5081     return false;
5082 
5083   // Don't change the width of a volatile or atomic loads.
5084   if (!LDST->isSimple())
5085     return false;
5086 
5087   EVT LdStMemVT = LDST->getMemoryVT();
5088 
5089   // Bail out when changing the scalable property, since we can't be sure that
5090   // we're actually narrowing here.
5091   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
5092     return false;
5093 
5094   // Verify that we are actually reducing a load width here.
5095   if (LdStMemVT.bitsLT(MemVT))
5096     return false;
5097 
5098   // Ensure that this isn't going to produce an unsupported memory access.
5099   if (ShAmt) {
5100     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
5101     const unsigned ByteShAmt = ShAmt / 8;
5102     const Align LDSTAlign = LDST->getAlign();
5103     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
5104     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
5105                                 LDST->getAddressSpace(), NarrowAlign,
5106                                 LDST->getMemOperand()->getFlags()))
5107       return false;
5108   }
5109 
5110   // It's not possible to generate a constant of extended or untyped type.
5111   EVT PtrType = LDST->getBasePtr().getValueType();
5112   if (PtrType == MVT::Untyped || PtrType.isExtended())
5113     return false;
5114 
5115   if (isa<LoadSDNode>(LDST)) {
5116     LoadSDNode *Load = cast<LoadSDNode>(LDST);
5117     // Don't transform one with multiple uses, this would require adding a new
5118     // load.
5119     if (!SDValue(Load, 0).hasOneUse())
5120       return false;
5121 
5122     if (LegalOperations &&
5123         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
5124       return false;
5125 
5126     // For the transform to be legal, the load must produce only two values
5127     // (the value loaded and the chain).  Don't transform a pre-increment
5128     // load, for example, which produces an extra value.  Otherwise the
5129     // transformation is not equivalent, and the downstream logic to replace
5130     // uses gets things wrong.
5131     if (Load->getNumValues() > 2)
5132       return false;
5133 
5134     // If the load that we're shrinking is an extload and we're not just
5135     // discarding the extension we can't simply shrink the load. Bail.
5136     // TODO: It would be possible to merge the extensions in some cases.
5137     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
5138         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5139       return false;
5140 
5141     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
5142       return false;
5143   } else {
5144     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
5145     StoreSDNode *Store = cast<StoreSDNode>(LDST);
5146     // Can't write outside the original store
5147     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
5148       return false;
5149 
5150     if (LegalOperations &&
5151         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
5152       return false;
5153   }
5154   return true;
5155 }
5156 
5157 bool DAGCombiner::SearchForAndLoads(SDNode *N,
5158                                     SmallVectorImpl<LoadSDNode*> &Loads,
5159                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
5160                                     ConstantSDNode *Mask,
5161                                     SDNode *&NodeToMask) {
5162   // Recursively search for the operands, looking for loads which can be
5163   // narrowed.
5164   for (SDValue Op : N->op_values()) {
5165     if (Op.getValueType().isVector())
5166       return false;
5167 
5168     // Some constants may need fixing up later if they are too large.
5169     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5170       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
5171           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
5172         NodesWithConsts.insert(N);
5173       continue;
5174     }
5175 
5176     if (!Op.hasOneUse())
5177       return false;
5178 
5179     switch(Op.getOpcode()) {
5180     case ISD::LOAD: {
5181       auto *Load = cast<LoadSDNode>(Op);
5182       EVT ExtVT;
5183       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5184           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5185 
5186         // ZEXTLOAD is already small enough.
5187         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5188             ExtVT.bitsGE(Load->getMemoryVT()))
5189           continue;
5190 
5191         // Use LE to convert equal sized loads to zext.
5192         if (ExtVT.bitsLE(Load->getMemoryVT()))
5193           Loads.push_back(Load);
5194 
5195         continue;
5196       }
5197       return false;
5198     }
5199     case ISD::ZERO_EXTEND:
5200     case ISD::AssertZext: {
5201       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5202       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5203       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5204         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5205         Op.getOperand(0).getValueType();
5206 
5207       // We can accept extending nodes if the mask is wider or an equal
5208       // width to the original type.
5209       if (ExtVT.bitsGE(VT))
5210         continue;
5211       break;
5212     }
5213     case ISD::OR:
5214     case ISD::XOR:
5215     case ISD::AND:
5216       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5217                              NodeToMask))
5218         return false;
5219       continue;
5220     }
5221 
5222     // Allow one node which will masked along with any loads found.
5223     if (NodeToMask)
5224       return false;
5225 
5226     // Also ensure that the node to be masked only produces one data result.
5227     NodeToMask = Op.getNode();
5228     if (NodeToMask->getNumValues() > 1) {
5229       bool HasValue = false;
5230       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5231         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5232         if (VT != MVT::Glue && VT != MVT::Other) {
5233           if (HasValue) {
5234             NodeToMask = nullptr;
5235             return false;
5236           }
5237           HasValue = true;
5238         }
5239       }
5240       assert(HasValue && "Node to be masked has no data result?");
5241     }
5242   }
5243   return true;
5244 }
5245 
5246 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
5247   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5248   if (!Mask)
5249     return false;
5250 
5251   if (!Mask->getAPIntValue().isMask())
5252     return false;
5253 
5254   // No need to do anything if the and directly uses a load.
5255   if (isa<LoadSDNode>(N->getOperand(0)))
5256     return false;
5257 
5258   SmallVector<LoadSDNode*, 8> Loads;
5259   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5260   SDNode *FixupNode = nullptr;
5261   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5262     if (Loads.size() == 0)
5263       return false;
5264 
5265     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5266     SDValue MaskOp = N->getOperand(1);
5267 
5268     // If it exists, fixup the single node we allow in the tree that needs
5269     // masking.
5270     if (FixupNode) {
5271       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5272       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5273                                 FixupNode->getValueType(0),
5274                                 SDValue(FixupNode, 0), MaskOp);
5275       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5276       if (And.getOpcode() == ISD ::AND)
5277         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5278     }
5279 
5280     // Narrow any constants that need it.
5281     for (auto *LogicN : NodesWithConsts) {
5282       SDValue Op0 = LogicN->getOperand(0);
5283       SDValue Op1 = LogicN->getOperand(1);
5284 
5285       if (isa<ConstantSDNode>(Op0))
5286           std::swap(Op0, Op1);
5287 
5288       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5289                                 Op1, MaskOp);
5290 
5291       DAG.UpdateNodeOperands(LogicN, Op0, And);
5292     }
5293 
5294     // Create narrow loads.
5295     for (auto *Load : Loads) {
5296       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5297       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5298                                 SDValue(Load, 0), MaskOp);
5299       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5300       if (And.getOpcode() == ISD ::AND)
5301         And = SDValue(
5302             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5303       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5304       assert(NewLoad &&
5305              "Shouldn't be masking the load if it can't be narrowed");
5306       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5307     }
5308     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5309     return true;
5310   }
5311   return false;
5312 }
5313 
5314 // Unfold
5315 //    x &  (-1 'logical shift' y)
5316 // To
5317 //    (x 'opposite logical shift' y) 'logical shift' y
5318 // if it is better for performance.
5319 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5320   assert(N->getOpcode() == ISD::AND);
5321 
5322   SDValue N0 = N->getOperand(0);
5323   SDValue N1 = N->getOperand(1);
5324 
5325   // Do we actually prefer shifts over mask?
5326   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5327     return SDValue();
5328 
5329   // Try to match  (-1 '[outer] logical shift' y)
5330   unsigned OuterShift;
5331   unsigned InnerShift; // The opposite direction to the OuterShift.
5332   SDValue Y;           // Shift amount.
5333   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5334     if (!M.hasOneUse())
5335       return false;
5336     OuterShift = M->getOpcode();
5337     if (OuterShift == ISD::SHL)
5338       InnerShift = ISD::SRL;
5339     else if (OuterShift == ISD::SRL)
5340       InnerShift = ISD::SHL;
5341     else
5342       return false;
5343     if (!isAllOnesConstant(M->getOperand(0)))
5344       return false;
5345     Y = M->getOperand(1);
5346     return true;
5347   };
5348 
5349   SDValue X;
5350   if (matchMask(N1))
5351     X = N0;
5352   else if (matchMask(N0))
5353     X = N1;
5354   else
5355     return SDValue();
5356 
5357   SDLoc DL(N);
5358   EVT VT = N->getValueType(0);
5359 
5360   //     tmp = x   'opposite logical shift' y
5361   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5362   //     ret = tmp 'logical shift' y
5363   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5364 
5365   return T1;
5366 }
5367 
5368 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5369 /// For a target with a bit test, this is expected to become test + set and save
5370 /// at least 1 instruction.
5371 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5372   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5373 
5374   // This is probably not worthwhile without a supported type.
5375   EVT VT = And->getValueType(0);
5376   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5377   if (!TLI.isTypeLegal(VT))
5378     return SDValue();
5379 
5380   // Look through an optional extension and find a 'not'.
5381   // TODO: Should we favor test+set even without the 'not' op?
5382   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5383   if (Not.getOpcode() == ISD::ANY_EXTEND)
5384     Not = Not.getOperand(0);
5385   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5386     return SDValue();
5387 
5388   // Look though an optional truncation. The source operand may not be the same
5389   // type as the original 'and', but that is ok because we are masking off
5390   // everything but the low bit.
5391   SDValue Srl = Not.getOperand(0);
5392   if (Srl.getOpcode() == ISD::TRUNCATE)
5393     Srl = Srl.getOperand(0);
5394 
5395   // Match a shift-right by constant.
5396   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5397       !isa<ConstantSDNode>(Srl.getOperand(1)))
5398     return SDValue();
5399 
5400   // We might have looked through casts that make this transform invalid.
5401   // TODO: If the source type is wider than the result type, do the mask and
5402   //       compare in the source type.
5403   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5404   unsigned VTBitWidth = VT.getSizeInBits();
5405   if (ShiftAmt.uge(VTBitWidth))
5406     return SDValue();
5407 
5408   // Turn this into a bit-test pattern using mask op + setcc:
5409   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5410   SDLoc DL(And);
5411   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5412   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5413   SDValue Mask = DAG.getConstant(
5414       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5415   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5416   SDValue Zero = DAG.getConstant(0, DL, VT);
5417   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5418   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5419 }
5420 
5421 SDValue DAGCombiner::visitAND(SDNode *N) {
5422   SDValue N0 = N->getOperand(0);
5423   SDValue N1 = N->getOperand(1);
5424   EVT VT = N1.getValueType();
5425 
5426   // x & x --> x
5427   if (N0 == N1)
5428     return N0;
5429 
5430   // fold vector ops
5431   if (VT.isVector()) {
5432     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5433       return FoldedVOp;
5434 
5435     // fold (and x, 0) -> 0, vector edition
5436     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5437       // do not return N0, because undef node may exist in N0
5438       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5439                              SDLoc(N), N0.getValueType());
5440     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5441       // do not return N1, because undef node may exist in N1
5442       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5443                              SDLoc(N), N1.getValueType());
5444 
5445     // fold (and x, -1) -> x, vector edition
5446     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5447       return N1;
5448     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5449       return N0;
5450 
5451     // fold (and (masked_load) (build_vec (x, ...))) to zext_masked_load
5452     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
5453     auto *BVec = dyn_cast<BuildVectorSDNode>(N1);
5454     if (MLoad && BVec && MLoad->getExtensionType() == ISD::EXTLOAD &&
5455         N0.hasOneUse() && N1.hasOneUse()) {
5456       EVT LoadVT = MLoad->getMemoryVT();
5457       EVT ExtVT = VT;
5458       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
5459         // For this AND to be a zero extension of the masked load the elements
5460         // of the BuildVec must mask the bottom bits of the extended element
5461         // type
5462         if (ConstantSDNode *Splat = BVec->getConstantSplatNode()) {
5463           uint64_t ElementSize =
5464               LoadVT.getVectorElementType().getScalarSizeInBits();
5465           if (Splat->getAPIntValue().isMask(ElementSize)) {
5466             return DAG.getMaskedLoad(
5467                 ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
5468                 MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
5469                 LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
5470                 ISD::ZEXTLOAD, MLoad->isExpandingLoad());
5471           }
5472         }
5473       }
5474     }
5475   }
5476 
5477   // fold (and c1, c2) -> c1&c2
5478   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5479   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
5480     return C;
5481 
5482   // canonicalize constant to RHS
5483   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5484       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5485     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5486 
5487   // fold (and x, -1) -> x
5488   if (isAllOnesConstant(N1))
5489     return N0;
5490 
5491   // if (and x, c) is known to be zero, return 0
5492   unsigned BitWidth = VT.getScalarSizeInBits();
5493   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5494                                    APInt::getAllOnesValue(BitWidth)))
5495     return DAG.getConstant(0, SDLoc(N), VT);
5496 
5497   if (SDValue NewSel = foldBinOpIntoSelect(N))
5498     return NewSel;
5499 
5500   // reassociate and
5501   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5502     return RAND;
5503 
5504   // Try to convert a constant mask AND into a shuffle clear mask.
5505   if (VT.isVector())
5506     if (SDValue Shuffle = XformToShuffleWithZero(N))
5507       return Shuffle;
5508 
5509   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
5510     return Combined;
5511 
5512   // fold (and (or x, C), D) -> D if (C & D) == D
5513   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5514     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5515   };
5516   if (N0.getOpcode() == ISD::OR &&
5517       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5518     return N1;
5519   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5520   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5521     SDValue N0Op0 = N0.getOperand(0);
5522     APInt Mask = ~N1C->getAPIntValue();
5523     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5524     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5525       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5526                                  N0.getValueType(), N0Op0);
5527 
5528       // Replace uses of the AND with uses of the Zero extend node.
5529       CombineTo(N, Zext);
5530 
5531       // We actually want to replace all uses of the any_extend with the
5532       // zero_extend, to avoid duplicating things.  This will later cause this
5533       // AND to be folded.
5534       CombineTo(N0.getNode(), Zext);
5535       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5536     }
5537   }
5538 
5539   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5540   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5541   // already be zero by virtue of the width of the base type of the load.
5542   //
5543   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5544   // more cases.
5545   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5546        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5547        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5548        N0.getOperand(0).getResNo() == 0) ||
5549       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5550     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5551                                          N0 : N0.getOperand(0) );
5552 
5553     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5554     // This can be a pure constant or a vector splat, in which case we treat the
5555     // vector as a scalar and use the splat value.
5556     APInt Constant = APInt::getNullValue(1);
5557     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5558       Constant = C->getAPIntValue();
5559     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5560       APInt SplatValue, SplatUndef;
5561       unsigned SplatBitSize;
5562       bool HasAnyUndefs;
5563       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5564                                              SplatBitSize, HasAnyUndefs);
5565       if (IsSplat) {
5566         // Undef bits can contribute to a possible optimisation if set, so
5567         // set them.
5568         SplatValue |= SplatUndef;
5569 
5570         // The splat value may be something like "0x00FFFFFF", which means 0 for
5571         // the first vector value and FF for the rest, repeating. We need a mask
5572         // that will apply equally to all members of the vector, so AND all the
5573         // lanes of the constant together.
5574         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5575 
5576         // If the splat value has been compressed to a bitlength lower
5577         // than the size of the vector lane, we need to re-expand it to
5578         // the lane size.
5579         if (EltBitWidth > SplatBitSize)
5580           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5581                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5582             SplatValue |= SplatValue.shl(SplatBitSize);
5583 
5584         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5585         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5586         if ((SplatBitSize % EltBitWidth) == 0) {
5587           Constant = APInt::getAllOnesValue(EltBitWidth);
5588           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5589             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5590         }
5591       }
5592     }
5593 
5594     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5595     // actually legal and isn't going to get expanded, else this is a false
5596     // optimisation.
5597     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5598                                                     Load->getValueType(0),
5599                                                     Load->getMemoryVT());
5600 
5601     // Resize the constant to the same size as the original memory access before
5602     // extension. If it is still the AllOnesValue then this AND is completely
5603     // unneeded.
5604     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5605 
5606     bool B;
5607     switch (Load->getExtensionType()) {
5608     default: B = false; break;
5609     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5610     case ISD::ZEXTLOAD:
5611     case ISD::NON_EXTLOAD: B = true; break;
5612     }
5613 
5614     if (B && Constant.isAllOnesValue()) {
5615       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5616       // preserve semantics once we get rid of the AND.
5617       SDValue NewLoad(Load, 0);
5618 
5619       // Fold the AND away. NewLoad may get replaced immediately.
5620       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5621 
5622       if (Load->getExtensionType() == ISD::EXTLOAD) {
5623         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5624                               Load->getValueType(0), SDLoc(Load),
5625                               Load->getChain(), Load->getBasePtr(),
5626                               Load->getOffset(), Load->getMemoryVT(),
5627                               Load->getMemOperand());
5628         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5629         if (Load->getNumValues() == 3) {
5630           // PRE/POST_INC loads have 3 values.
5631           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5632                            NewLoad.getValue(2) };
5633           CombineTo(Load, To, 3, true);
5634         } else {
5635           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5636         }
5637       }
5638 
5639       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5640     }
5641   }
5642 
5643   // fold (and (masked_gather x)) -> (zext_masked_gather x)
5644   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
5645     EVT MemVT = GN0->getMemoryVT();
5646     EVT ScalarVT = MemVT.getScalarType();
5647 
5648     if (SDValue(GN0, 0).hasOneUse() &&
5649         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
5650         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
5651       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
5652                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
5653 
5654       SDValue ZExtLoad = DAG.getMaskedGather(
5655           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
5656           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
5657 
5658       CombineTo(N, ZExtLoad);
5659       AddToWorklist(ZExtLoad.getNode());
5660       // Avoid recheck of N.
5661       return SDValue(N, 0);
5662     }
5663   }
5664 
5665   // fold (and (load x), 255) -> (zextload x, i8)
5666   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5667   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5668   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5669                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5670                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5671     if (SDValue Res = ReduceLoadWidth(N)) {
5672       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5673         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5674       AddToWorklist(N);
5675       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5676       return SDValue(N, 0);
5677     }
5678   }
5679 
5680   if (LegalTypes) {
5681     // Attempt to propagate the AND back up to the leaves which, if they're
5682     // loads, can be combined to narrow loads and the AND node can be removed.
5683     // Perform after legalization so that extend nodes will already be
5684     // combined into the loads.
5685     if (BackwardsPropagateMask(N))
5686       return SDValue(N, 0);
5687   }
5688 
5689   if (SDValue Combined = visitANDLike(N0, N1, N))
5690     return Combined;
5691 
5692   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5693   if (N0.getOpcode() == N1.getOpcode())
5694     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5695       return V;
5696 
5697   // Masking the negated extension of a boolean is just the zero-extended
5698   // boolean:
5699   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5700   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5701   //
5702   // Note: the SimplifyDemandedBits fold below can make an information-losing
5703   // transform, and then we have no way to find this better fold.
5704   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5705     if (isNullOrNullSplat(N0.getOperand(0))) {
5706       SDValue SubRHS = N0.getOperand(1);
5707       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5708           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5709         return SubRHS;
5710       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5711           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5712         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5713     }
5714   }
5715 
5716   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5717   // fold (and (sra)) -> (and (srl)) when possible.
5718   if (SimplifyDemandedBits(SDValue(N, 0)))
5719     return SDValue(N, 0);
5720 
5721   // fold (zext_inreg (extload x)) -> (zextload x)
5722   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5723   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5724       (ISD::isEXTLoad(N0.getNode()) ||
5725        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5726     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5727     EVT MemVT = LN0->getMemoryVT();
5728     // If we zero all the possible extended bits, then we can turn this into
5729     // a zextload if we are running before legalize or the operation is legal.
5730     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5731     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5732     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5733     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5734         ((!LegalOperations && LN0->isSimple()) ||
5735          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5736       SDValue ExtLoad =
5737           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5738                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5739       AddToWorklist(N);
5740       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5741       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5742     }
5743   }
5744 
5745   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5746   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5747     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5748                                            N0.getOperand(1), false))
5749       return BSwap;
5750   }
5751 
5752   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5753     return Shifts;
5754 
5755   if (TLI.hasBitTest(N0, N1))
5756     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5757       return V;
5758 
5759   // Recognize the following pattern:
5760   //
5761   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
5762   //
5763   // where bitmask is a mask that clears the upper bits of AndVT. The
5764   // number of bits in bitmask must be a power of two.
5765   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
5766     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
5767       return false;
5768 
5769     auto *C = dyn_cast<ConstantSDNode>(RHS);
5770     if (!C)
5771       return false;
5772 
5773     if (!C->getAPIntValue().isMask(
5774             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
5775       return false;
5776 
5777     return true;
5778   };
5779 
5780   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
5781   if (IsAndZeroExtMask(N0, N1))
5782     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
5783 
5784   return SDValue();
5785 }
5786 
5787 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5788 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5789                                         bool DemandHighBits) {
5790   if (!LegalOperations)
5791     return SDValue();
5792 
5793   EVT VT = N->getValueType(0);
5794   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5795     return SDValue();
5796   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5797     return SDValue();
5798 
5799   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5800   bool LookPassAnd0 = false;
5801   bool LookPassAnd1 = false;
5802   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5803       std::swap(N0, N1);
5804   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5805       std::swap(N0, N1);
5806   if (N0.getOpcode() == ISD::AND) {
5807     if (!N0.getNode()->hasOneUse())
5808       return SDValue();
5809     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5810     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5811     // This is needed for X86.
5812     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5813                   N01C->getZExtValue() != 0xFFFF))
5814       return SDValue();
5815     N0 = N0.getOperand(0);
5816     LookPassAnd0 = true;
5817   }
5818 
5819   if (N1.getOpcode() == ISD::AND) {
5820     if (!N1.getNode()->hasOneUse())
5821       return SDValue();
5822     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5823     if (!N11C || N11C->getZExtValue() != 0xFF)
5824       return SDValue();
5825     N1 = N1.getOperand(0);
5826     LookPassAnd1 = true;
5827   }
5828 
5829   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5830     std::swap(N0, N1);
5831   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5832     return SDValue();
5833   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5834     return SDValue();
5835 
5836   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5837   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5838   if (!N01C || !N11C)
5839     return SDValue();
5840   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5841     return SDValue();
5842 
5843   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5844   SDValue N00 = N0->getOperand(0);
5845   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5846     if (!N00.getNode()->hasOneUse())
5847       return SDValue();
5848     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5849     if (!N001C || N001C->getZExtValue() != 0xFF)
5850       return SDValue();
5851     N00 = N00.getOperand(0);
5852     LookPassAnd0 = true;
5853   }
5854 
5855   SDValue N10 = N1->getOperand(0);
5856   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5857     if (!N10.getNode()->hasOneUse())
5858       return SDValue();
5859     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5860     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5861     // for X86.
5862     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5863                    N101C->getZExtValue() != 0xFFFF))
5864       return SDValue();
5865     N10 = N10.getOperand(0);
5866     LookPassAnd1 = true;
5867   }
5868 
5869   if (N00 != N10)
5870     return SDValue();
5871 
5872   // Make sure everything beyond the low halfword gets set to zero since the SRL
5873   // 16 will clear the top bits.
5874   unsigned OpSizeInBits = VT.getSizeInBits();
5875   if (DemandHighBits && OpSizeInBits > 16) {
5876     // If the left-shift isn't masked out then the only way this is a bswap is
5877     // if all bits beyond the low 8 are 0. In that case the entire pattern
5878     // reduces to a left shift anyway: leave it for other parts of the combiner.
5879     if (!LookPassAnd0)
5880       return SDValue();
5881 
5882     // However, if the right shift isn't masked out then it might be because
5883     // it's not needed. See if we can spot that too.
5884     if (!LookPassAnd1 &&
5885         !DAG.MaskedValueIsZero(
5886             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5887       return SDValue();
5888   }
5889 
5890   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5891   if (OpSizeInBits > 16) {
5892     SDLoc DL(N);
5893     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5894                       DAG.getConstant(OpSizeInBits - 16, DL,
5895                                       getShiftAmountTy(VT)));
5896   }
5897   return Res;
5898 }
5899 
5900 /// Return true if the specified node is an element that makes up a 32-bit
5901 /// packed halfword byteswap.
5902 /// ((x & 0x000000ff) << 8) |
5903 /// ((x & 0x0000ff00) >> 8) |
5904 /// ((x & 0x00ff0000) << 8) |
5905 /// ((x & 0xff000000) >> 8)
5906 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5907   if (!N.getNode()->hasOneUse())
5908     return false;
5909 
5910   unsigned Opc = N.getOpcode();
5911   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5912     return false;
5913 
5914   SDValue N0 = N.getOperand(0);
5915   unsigned Opc0 = N0.getOpcode();
5916   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5917     return false;
5918 
5919   ConstantSDNode *N1C = nullptr;
5920   // SHL or SRL: look upstream for AND mask operand
5921   if (Opc == ISD::AND)
5922     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5923   else if (Opc0 == ISD::AND)
5924     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5925   if (!N1C)
5926     return false;
5927 
5928   unsigned MaskByteOffset;
5929   switch (N1C->getZExtValue()) {
5930   default:
5931     return false;
5932   case 0xFF:       MaskByteOffset = 0; break;
5933   case 0xFF00:     MaskByteOffset = 1; break;
5934   case 0xFFFF:
5935     // In case demanded bits didn't clear the bits that will be shifted out.
5936     // This is needed for X86.
5937     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5938       MaskByteOffset = 1;
5939       break;
5940     }
5941     return false;
5942   case 0xFF0000:   MaskByteOffset = 2; break;
5943   case 0xFF000000: MaskByteOffset = 3; break;
5944   }
5945 
5946   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5947   if (Opc == ISD::AND) {
5948     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5949       // (x >> 8) & 0xff
5950       // (x >> 8) & 0xff0000
5951       if (Opc0 != ISD::SRL)
5952         return false;
5953       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5954       if (!C || C->getZExtValue() != 8)
5955         return false;
5956     } else {
5957       // (x << 8) & 0xff00
5958       // (x << 8) & 0xff000000
5959       if (Opc0 != ISD::SHL)
5960         return false;
5961       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5962       if (!C || C->getZExtValue() != 8)
5963         return false;
5964     }
5965   } else if (Opc == ISD::SHL) {
5966     // (x & 0xff) << 8
5967     // (x & 0xff0000) << 8
5968     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5969       return false;
5970     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5971     if (!C || C->getZExtValue() != 8)
5972       return false;
5973   } else { // Opc == ISD::SRL
5974     // (x & 0xff00) >> 8
5975     // (x & 0xff000000) >> 8
5976     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5977       return false;
5978     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5979     if (!C || C->getZExtValue() != 8)
5980       return false;
5981   }
5982 
5983   if (Parts[MaskByteOffset])
5984     return false;
5985 
5986   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5987   return true;
5988 }
5989 
5990 // Match 2 elements of a packed halfword bswap.
5991 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5992   if (N.getOpcode() == ISD::OR)
5993     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5994            isBSwapHWordElement(N.getOperand(1), Parts);
5995 
5996   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5997     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5998     if (!C || C->getAPIntValue() != 16)
5999       return false;
6000     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
6001     return true;
6002   }
6003 
6004   return false;
6005 }
6006 
6007 // Match this pattern:
6008 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
6009 // And rewrite this to:
6010 //   (rotr (bswap A), 16)
6011 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
6012                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
6013                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
6014   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
6015          "MatchBSwapHWordOrAndAnd: expecting i32");
6016   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6017     return SDValue();
6018   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
6019     return SDValue();
6020   // TODO: this is too restrictive; lifting this restriction requires more tests
6021   if (!N0->hasOneUse() || !N1->hasOneUse())
6022     return SDValue();
6023   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
6024   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
6025   if (!Mask0 || !Mask1)
6026     return SDValue();
6027   if (Mask0->getAPIntValue() != 0xff00ff00 ||
6028       Mask1->getAPIntValue() != 0x00ff00ff)
6029     return SDValue();
6030   SDValue Shift0 = N0.getOperand(0);
6031   SDValue Shift1 = N1.getOperand(0);
6032   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
6033     return SDValue();
6034   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
6035   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
6036   if (!ShiftAmt0 || !ShiftAmt1)
6037     return SDValue();
6038   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
6039     return SDValue();
6040   if (Shift0.getOperand(0) != Shift1.getOperand(0))
6041     return SDValue();
6042 
6043   SDLoc DL(N);
6044   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
6045   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
6046   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6047 }
6048 
6049 /// Match a 32-bit packed halfword bswap. That is
6050 /// ((x & 0x000000ff) << 8) |
6051 /// ((x & 0x0000ff00) >> 8) |
6052 /// ((x & 0x00ff0000) << 8) |
6053 /// ((x & 0xff000000) >> 8)
6054 /// => (rotl (bswap x), 16)
6055 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
6056   if (!LegalOperations)
6057     return SDValue();
6058 
6059   EVT VT = N->getValueType(0);
6060   if (VT != MVT::i32)
6061     return SDValue();
6062   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
6063     return SDValue();
6064 
6065   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
6066                                               getShiftAmountTy(VT)))
6067   return BSwap;
6068 
6069   // Try again with commuted operands.
6070   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
6071                                               getShiftAmountTy(VT)))
6072   return BSwap;
6073 
6074 
6075   // Look for either
6076   // (or (bswaphpair), (bswaphpair))
6077   // (or (or (bswaphpair), (and)), (and))
6078   // (or (or (and), (bswaphpair)), (and))
6079   SDNode *Parts[4] = {};
6080 
6081   if (isBSwapHWordPair(N0, Parts)) {
6082     // (or (or (and), (and)), (or (and), (and)))
6083     if (!isBSwapHWordPair(N1, Parts))
6084       return SDValue();
6085   } else if (N0.getOpcode() == ISD::OR) {
6086     // (or (or (or (and), (and)), (and)), (and))
6087     if (!isBSwapHWordElement(N1, Parts))
6088       return SDValue();
6089     SDValue N00 = N0.getOperand(0);
6090     SDValue N01 = N0.getOperand(1);
6091     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
6092         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
6093       return SDValue();
6094   } else
6095     return SDValue();
6096 
6097   // Make sure the parts are all coming from the same node.
6098   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
6099     return SDValue();
6100 
6101   SDLoc DL(N);
6102   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
6103                               SDValue(Parts[0], 0));
6104 
6105   // Result of the bswap should be rotated by 16. If it's not legal, then
6106   // do  (x << 16) | (x >> 16).
6107   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
6108   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
6109     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
6110   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
6111     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
6112   return DAG.getNode(ISD::OR, DL, VT,
6113                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
6114                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
6115 }
6116 
6117 /// This contains all DAGCombine rules which reduce two values combined by
6118 /// an Or operation to a single value \see visitANDLike().
6119 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
6120   EVT VT = N1.getValueType();
6121   SDLoc DL(N);
6122 
6123   // fold (or x, undef) -> -1
6124   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
6125     return DAG.getAllOnesConstant(DL, VT);
6126 
6127   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
6128     return V;
6129 
6130   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
6131   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
6132       // Don't increase # computations.
6133       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6134     // We can only do this xform if we know that bits from X that are set in C2
6135     // but not in C1 are already zero.  Likewise for Y.
6136     if (const ConstantSDNode *N0O1C =
6137         getAsNonOpaqueConstant(N0.getOperand(1))) {
6138       if (const ConstantSDNode *N1O1C =
6139           getAsNonOpaqueConstant(N1.getOperand(1))) {
6140         // We can only do this xform if we know that bits from X that are set in
6141         // C2 but not in C1 are already zero.  Likewise for Y.
6142         const APInt &LHSMask = N0O1C->getAPIntValue();
6143         const APInt &RHSMask = N1O1C->getAPIntValue();
6144 
6145         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
6146             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
6147           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6148                                   N0.getOperand(0), N1.getOperand(0));
6149           return DAG.getNode(ISD::AND, DL, VT, X,
6150                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
6151         }
6152       }
6153     }
6154   }
6155 
6156   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
6157   if (N0.getOpcode() == ISD::AND &&
6158       N1.getOpcode() == ISD::AND &&
6159       N0.getOperand(0) == N1.getOperand(0) &&
6160       // Don't increase # computations.
6161       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
6162     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
6163                             N0.getOperand(1), N1.getOperand(1));
6164     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
6165   }
6166 
6167   return SDValue();
6168 }
6169 
6170 /// OR combines for which the commuted variant will be tried as well.
6171 static SDValue visitORCommutative(
6172     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
6173   EVT VT = N0.getValueType();
6174   if (N0.getOpcode() == ISD::AND) {
6175     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
6176     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
6177       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
6178 
6179     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
6180     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
6181       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
6182   }
6183 
6184   return SDValue();
6185 }
6186 
6187 SDValue DAGCombiner::visitOR(SDNode *N) {
6188   SDValue N0 = N->getOperand(0);
6189   SDValue N1 = N->getOperand(1);
6190   EVT VT = N1.getValueType();
6191 
6192   // x | x --> x
6193   if (N0 == N1)
6194     return N0;
6195 
6196   // fold vector ops
6197   if (VT.isVector()) {
6198     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6199       return FoldedVOp;
6200 
6201     // fold (or x, 0) -> x, vector edition
6202     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6203       return N1;
6204     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6205       return N0;
6206 
6207     // fold (or x, -1) -> -1, vector edition
6208     if (ISD::isBuildVectorAllOnes(N0.getNode()))
6209       // do not return N0, because undef node may exist in N0
6210       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
6211     if (ISD::isBuildVectorAllOnes(N1.getNode()))
6212       // do not return N1, because undef node may exist in N1
6213       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
6214 
6215     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
6216     // Do this only if the resulting shuffle is legal.
6217     if (isa<ShuffleVectorSDNode>(N0) &&
6218         isa<ShuffleVectorSDNode>(N1) &&
6219         // Avoid folding a node with illegal type.
6220         TLI.isTypeLegal(VT)) {
6221       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
6222       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
6223       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
6224       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
6225       // Ensure both shuffles have a zero input.
6226       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
6227         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
6228         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
6229         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
6230         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
6231         bool CanFold = true;
6232         int NumElts = VT.getVectorNumElements();
6233         SmallVector<int, 4> Mask(NumElts);
6234 
6235         for (int i = 0; i != NumElts; ++i) {
6236           int M0 = SV0->getMaskElt(i);
6237           int M1 = SV1->getMaskElt(i);
6238 
6239           // Determine if either index is pointing to a zero vector.
6240           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
6241           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
6242 
6243           // If one element is zero and the otherside is undef, keep undef.
6244           // This also handles the case that both are undef.
6245           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
6246             Mask[i] = -1;
6247             continue;
6248           }
6249 
6250           // Make sure only one of the elements is zero.
6251           if (M0Zero == M1Zero) {
6252             CanFold = false;
6253             break;
6254           }
6255 
6256           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
6257 
6258           // We have a zero and non-zero element. If the non-zero came from
6259           // SV0 make the index a LHS index. If it came from SV1, make it
6260           // a RHS index. We need to mod by NumElts because we don't care
6261           // which operand it came from in the original shuffles.
6262           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
6263         }
6264 
6265         if (CanFold) {
6266           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
6267           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
6268 
6269           SDValue LegalShuffle =
6270               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
6271                                           Mask, DAG);
6272           if (LegalShuffle)
6273             return LegalShuffle;
6274         }
6275       }
6276     }
6277   }
6278 
6279   // fold (or c1, c2) -> c1|c2
6280   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
6281   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
6282     return C;
6283 
6284   // canonicalize constant to RHS
6285   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6286      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6287     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
6288 
6289   // fold (or x, 0) -> x
6290   if (isNullConstant(N1))
6291     return N0;
6292 
6293   // fold (or x, -1) -> -1
6294   if (isAllOnesConstant(N1))
6295     return N1;
6296 
6297   if (SDValue NewSel = foldBinOpIntoSelect(N))
6298     return NewSel;
6299 
6300   // fold (or x, c) -> c iff (x & ~c) == 0
6301   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
6302     return N1;
6303 
6304   if (SDValue Combined = visitORLike(N0, N1, N))
6305     return Combined;
6306 
6307   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
6308     return Combined;
6309 
6310   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6311   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6312     return BSwap;
6313   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6314     return BSwap;
6315 
6316   // reassociate or
6317   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6318     return ROR;
6319 
6320   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6321   // iff (c1 & c2) != 0 or c1/c2 are undef.
6322   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6323     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6324   };
6325   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6326       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6327     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
6328                                                  {N1, N0.getOperand(1)})) {
6329       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6330       AddToWorklist(IOR.getNode());
6331       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6332     }
6333   }
6334 
6335   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6336     return Combined;
6337   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6338     return Combined;
6339 
6340   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6341   if (N0.getOpcode() == N1.getOpcode())
6342     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6343       return V;
6344 
6345   // See if this is some rotate idiom.
6346   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
6347     return Rot;
6348 
6349   if (SDValue Load = MatchLoadCombine(N))
6350     return Load;
6351 
6352   // Simplify the operands using demanded-bits information.
6353   if (SimplifyDemandedBits(SDValue(N, 0)))
6354     return SDValue(N, 0);
6355 
6356   // If OR can be rewritten into ADD, try combines based on ADD.
6357   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6358       DAG.haveNoCommonBitsSet(N0, N1))
6359     if (SDValue Combined = visitADDLike(N))
6360       return Combined;
6361 
6362   return SDValue();
6363 }
6364 
6365 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6366   if (Op.getOpcode() == ISD::AND &&
6367       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6368     Mask = Op.getOperand(1);
6369     return Op.getOperand(0);
6370   }
6371   return Op;
6372 }
6373 
6374 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6375 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6376                             SDValue &Mask) {
6377   Op = stripConstantMask(DAG, Op, Mask);
6378   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6379     Shift = Op;
6380     return true;
6381   }
6382   return false;
6383 }
6384 
6385 /// Helper function for visitOR to extract the needed side of a rotate idiom
6386 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6387 /// InstCombine merged some outside op with one of the shifts from
6388 /// the rotate pattern.
6389 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6390 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6391 /// patterns:
6392 ///
6393 ///   (or (add v v) (shrl v bitwidth-1)):
6394 ///     expands (add v v) -> (shl v 1)
6395 ///
6396 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6397 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6398 ///
6399 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6400 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6401 ///
6402 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6403 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6404 ///
6405 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6406 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6407 ///
6408 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6409 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6410                                      SDValue ExtractFrom, SDValue &Mask,
6411                                      const SDLoc &DL) {
6412   assert(OppShift && ExtractFrom && "Empty SDValue");
6413   assert(
6414       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6415       "Existing shift must be valid as a rotate half");
6416 
6417   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6418 
6419   // Value and Type of the shift.
6420   SDValue OppShiftLHS = OppShift.getOperand(0);
6421   EVT ShiftedVT = OppShiftLHS.getValueType();
6422 
6423   // Amount of the existing shift.
6424   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6425 
6426   // (add v v) -> (shl v 1)
6427   // TODO: Should this be a general DAG canonicalization?
6428   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6429       ExtractFrom.getOpcode() == ISD::ADD &&
6430       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6431       ExtractFrom.getOperand(0) == OppShiftLHS &&
6432       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6433     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6434                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6435 
6436   // Preconditions:
6437   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6438   //
6439   // Find opcode of the needed shift to be extracted from (op0 v c0).
6440   unsigned Opcode = ISD::DELETED_NODE;
6441   bool IsMulOrDiv = false;
6442   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6443   // opcode or its arithmetic (mul or udiv) variant.
6444   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6445     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6446     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6447       return false;
6448     Opcode = NeededShift;
6449     return true;
6450   };
6451   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6452   // that the needed shift can be extracted from.
6453   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6454       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6455     return SDValue();
6456 
6457   // op0 must be the same opcode on both sides, have the same LHS argument,
6458   // and produce the same value type.
6459   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6460       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6461       ShiftedVT != ExtractFrom.getValueType())
6462     return SDValue();
6463 
6464   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6465   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6466   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6467   ConstantSDNode *ExtractFromCst =
6468       isConstOrConstSplat(ExtractFrom.getOperand(1));
6469   // TODO: We should be able to handle non-uniform constant vectors for these values
6470   // Check that we have constant values.
6471   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6472       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6473       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6474     return SDValue();
6475 
6476   // Compute the shift amount we need to extract to complete the rotate.
6477   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6478   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6479     return SDValue();
6480   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6481   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6482   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6483   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6484   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6485 
6486   // Now try extract the needed shift from the ExtractFrom op and see if the
6487   // result matches up with the existing shift's LHS op.
6488   if (IsMulOrDiv) {
6489     // Op to extract from is a mul or udiv by a constant.
6490     // Check:
6491     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6492     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6493     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6494                                                  NeededShiftAmt.getZExtValue());
6495     APInt ResultAmt;
6496     APInt Rem;
6497     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6498     if (Rem != 0 || ResultAmt != OppLHSAmt)
6499       return SDValue();
6500   } else {
6501     // Op to extract from is a shift by a constant.
6502     // Check:
6503     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6504     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6505                                           ExtractFromAmt.getBitWidth()))
6506       return SDValue();
6507   }
6508 
6509   // Return the expanded shift op that should allow a rotate to be formed.
6510   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6511   EVT ResVT = ExtractFrom.getValueType();
6512   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6513   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6514 }
6515 
6516 // Return true if we can prove that, whenever Neg and Pos are both in the
6517 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6518 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6519 //
6520 //     (or (shift1 X, Neg), (shift2 X, Pos))
6521 //
6522 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6523 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6524 // to consider shift amounts with defined behavior.
6525 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6526                            SelectionDAG &DAG) {
6527   // If EltSize is a power of 2 then:
6528   //
6529   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6530   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6531   //
6532   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6533   // for the stronger condition:
6534   //
6535   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6536   //
6537   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6538   // we can just replace Neg with Neg' for the rest of the function.
6539   //
6540   // In other cases we check for the even stronger condition:
6541   //
6542   //     Neg == EltSize - Pos                                    [B]
6543   //
6544   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6545   // behavior if Pos == 0 (and consequently Neg == EltSize).
6546   //
6547   // We could actually use [A] whenever EltSize is a power of 2, but the
6548   // only extra cases that it would match are those uninteresting ones
6549   // where Neg and Pos are never in range at the same time.  E.g. for
6550   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6551   // as well as (sub 32, Pos), but:
6552   //
6553   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6554   //
6555   // always invokes undefined behavior for 32-bit X.
6556   //
6557   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6558   unsigned MaskLoBits = 0;
6559   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6560     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6561       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6562       unsigned Bits = Log2_64(EltSize);
6563       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6564           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6565         Neg = Neg.getOperand(0);
6566         MaskLoBits = Bits;
6567       }
6568     }
6569   }
6570 
6571   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6572   if (Neg.getOpcode() != ISD::SUB)
6573     return false;
6574   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6575   if (!NegC)
6576     return false;
6577   SDValue NegOp1 = Neg.getOperand(1);
6578 
6579   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6580   // Pos'.  The truncation is redundant for the purpose of the equality.
6581   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6582     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6583       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6584       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6585           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6586            MaskLoBits))
6587         Pos = Pos.getOperand(0);
6588     }
6589   }
6590 
6591   // The condition we need is now:
6592   //
6593   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6594   //
6595   // If NegOp1 == Pos then we need:
6596   //
6597   //              EltSize & Mask == NegC & Mask
6598   //
6599   // (because "x & Mask" is a truncation and distributes through subtraction).
6600   //
6601   // We also need to account for a potential truncation of NegOp1 if the amount
6602   // has already been legalized to a shift amount type.
6603   APInt Width;
6604   if ((Pos == NegOp1) ||
6605       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
6606     Width = NegC->getAPIntValue();
6607 
6608   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6609   // Then the condition we want to prove becomes:
6610   //
6611   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6612   //
6613   // which, again because "x & Mask" is a truncation, becomes:
6614   //
6615   //                NegC & Mask == (EltSize - PosC) & Mask
6616   //             EltSize & Mask == (NegC + PosC) & Mask
6617   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6618     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6619       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6620     else
6621       return false;
6622   } else
6623     return false;
6624 
6625   // Now we just need to check that EltSize & Mask == Width & Mask.
6626   if (MaskLoBits)
6627     // EltSize & Mask is 0 since Mask is EltSize - 1.
6628     return Width.getLoBits(MaskLoBits) == 0;
6629   return Width == EltSize;
6630 }
6631 
6632 // A subroutine of MatchRotate used once we have found an OR of two opposite
6633 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6634 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6635 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6636 // Neg with outer conversions stripped away.
6637 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6638                                        SDValue Neg, SDValue InnerPos,
6639                                        SDValue InnerNeg, unsigned PosOpcode,
6640                                        unsigned NegOpcode, const SDLoc &DL) {
6641   // fold (or (shl x, (*ext y)),
6642   //          (srl x, (*ext (sub 32, y)))) ->
6643   //   (rotl x, y) or (rotr x, (sub 32, y))
6644   //
6645   // fold (or (shl x, (*ext (sub 32, y))),
6646   //          (srl x, (*ext y))) ->
6647   //   (rotr x, y) or (rotl x, (sub 32, y))
6648   EVT VT = Shifted.getValueType();
6649   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6650     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6651     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6652                        HasPos ? Pos : Neg);
6653   }
6654 
6655   return SDValue();
6656 }
6657 
6658 // A subroutine of MatchRotate used once we have found an OR of two opposite
6659 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
6660 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
6661 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6662 // Neg with outer conversions stripped away.
6663 // TODO: Merge with MatchRotatePosNeg.
6664 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
6665                                        SDValue Neg, SDValue InnerPos,
6666                                        SDValue InnerNeg, unsigned PosOpcode,
6667                                        unsigned NegOpcode, const SDLoc &DL) {
6668   EVT VT = N0.getValueType();
6669   unsigned EltBits = VT.getScalarSizeInBits();
6670 
6671   // fold (or (shl x0, (*ext y)),
6672   //          (srl x1, (*ext (sub 32, y)))) ->
6673   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
6674   //
6675   // fold (or (shl x0, (*ext (sub 32, y))),
6676   //          (srl x1, (*ext y))) ->
6677   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6678   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6679     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6680     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
6681                        HasPos ? Pos : Neg);
6682   }
6683 
6684   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
6685   // so for now just use the PosOpcode case if its legal.
6686   // TODO: When can we use the NegOpcode case?
6687   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
6688     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
6689       if (Op.getOpcode() != BinOpc)
6690         return false;
6691       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
6692       return Cst && (Cst->getAPIntValue() == Imm);
6693     };
6694 
6695     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
6696     //   -> (fshl x0, x1, y)
6697     if (IsBinOpImm(N1, ISD::SRL, 1) &&
6698         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
6699         InnerPos == InnerNeg.getOperand(0) &&
6700         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
6701       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
6702     }
6703 
6704     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
6705     //   -> (fshr x0, x1, y)
6706     if (IsBinOpImm(N0, ISD::SHL, 1) &&
6707         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6708         InnerNeg == InnerPos.getOperand(0) &&
6709         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6710       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6711     }
6712 
6713     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
6714     //   -> (fshr x0, x1, y)
6715     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
6716     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
6717         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
6718         InnerNeg == InnerPos.getOperand(0) &&
6719         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
6720       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
6721     }
6722   }
6723 
6724   return SDValue();
6725 }
6726 
6727 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6728 // idioms for rotate, and if the target supports rotation instructions, generate
6729 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
6730 // with different shifted sources.
6731 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6732   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6733   EVT VT = LHS.getValueType();
6734   if (!TLI.isTypeLegal(VT))
6735     return SDValue();
6736 
6737   // The target must have at least one rotate/funnel flavor.
6738   bool HasROTL = hasOperation(ISD::ROTL, VT);
6739   bool HasROTR = hasOperation(ISD::ROTR, VT);
6740   bool HasFSHL = hasOperation(ISD::FSHL, VT);
6741   bool HasFSHR = hasOperation(ISD::FSHR, VT);
6742   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
6743     return SDValue();
6744 
6745   // Check for truncated rotate.
6746   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6747       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6748     assert(LHS.getValueType() == RHS.getValueType());
6749     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6750       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6751     }
6752   }
6753 
6754   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6755   SDValue LHSShift;   // The shift.
6756   SDValue LHSMask;    // AND value if any.
6757   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6758 
6759   SDValue RHSShift;   // The shift.
6760   SDValue RHSMask;    // AND value if any.
6761   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6762 
6763   // If neither side matched a rotate half, bail
6764   if (!LHSShift && !RHSShift)
6765     return SDValue();
6766 
6767   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6768   // side of the rotate, so try to handle that here. In all cases we need to
6769   // pass the matched shift from the opposite side to compute the opcode and
6770   // needed shift amount to extract.  We still want to do this if both sides
6771   // matched a rotate half because one half may be a potential overshift that
6772   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6773   // single one).
6774 
6775   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6776   if (LHSShift)
6777     if (SDValue NewRHSShift =
6778             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6779       RHSShift = NewRHSShift;
6780   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6781   if (RHSShift)
6782     if (SDValue NewLHSShift =
6783             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6784       LHSShift = NewLHSShift;
6785 
6786   // If a side is still missing, nothing else we can do.
6787   if (!RHSShift || !LHSShift)
6788     return SDValue();
6789 
6790   // At this point we've matched or extracted a shift op on each side.
6791 
6792   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6793     return SDValue(); // Shifts must disagree.
6794 
6795   bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
6796   if (!IsRotate && !(HasFSHL || HasFSHR))
6797     return SDValue(); // Requires funnel shift support.
6798 
6799   // Canonicalize shl to left side in a shl/srl pair.
6800   if (RHSShift.getOpcode() == ISD::SHL) {
6801     std::swap(LHS, RHS);
6802     std::swap(LHSShift, RHSShift);
6803     std::swap(LHSMask, RHSMask);
6804   }
6805 
6806   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6807   SDValue LHSShiftArg = LHSShift.getOperand(0);
6808   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6809   SDValue RHSShiftArg = RHSShift.getOperand(0);
6810   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6811 
6812   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6813   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6814   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
6815   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
6816   // iff C1+C2 == EltSizeInBits
6817   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6818                                         ConstantSDNode *RHS) {
6819     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6820   };
6821   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6822     SDValue Res;
6823     if (IsRotate && (HasROTL || HasROTR))
6824       Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
6825                         HasROTL ? LHSShiftAmt : RHSShiftAmt);
6826     else
6827       Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
6828                         RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
6829 
6830     // If there is an AND of either shifted operand, apply it to the result.
6831     if (LHSMask.getNode() || RHSMask.getNode()) {
6832       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6833       SDValue Mask = AllOnes;
6834 
6835       if (LHSMask.getNode()) {
6836         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6837         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6838                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6839       }
6840       if (RHSMask.getNode()) {
6841         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6842         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6843                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6844       }
6845 
6846       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
6847     }
6848 
6849     return Res;
6850   }
6851 
6852   // If there is a mask here, and we have a variable shift, we can't be sure
6853   // that we're masking out the right stuff.
6854   if (LHSMask.getNode() || RHSMask.getNode())
6855     return SDValue();
6856 
6857   // If the shift amount is sign/zext/any-extended just peel it off.
6858   SDValue LExtOp0 = LHSShiftAmt;
6859   SDValue RExtOp0 = RHSShiftAmt;
6860   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6861        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6862        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6863        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6864       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6865        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6866        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6867        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6868     LExtOp0 = LHSShiftAmt.getOperand(0);
6869     RExtOp0 = RHSShiftAmt.getOperand(0);
6870   }
6871 
6872   if (IsRotate && (HasROTL || HasROTR)) {
6873     SDValue TryL =
6874         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
6875                           RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6876     if (TryL)
6877       return TryL;
6878 
6879     SDValue TryR =
6880         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
6881                           LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6882     if (TryR)
6883       return TryR;
6884   }
6885 
6886   SDValue TryL =
6887       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6888                         LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL);
6889   if (TryL)
6890     return TryL;
6891 
6892   SDValue TryR =
6893       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6894                         RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL);
6895   if (TryR)
6896     return TryR;
6897 
6898   return SDValue();
6899 }
6900 
6901 namespace {
6902 
6903 /// Represents known origin of an individual byte in load combine pattern. The
6904 /// value of the byte is either constant zero or comes from memory.
6905 struct ByteProvider {
6906   // For constant zero providers Load is set to nullptr. For memory providers
6907   // Load represents the node which loads the byte from memory.
6908   // ByteOffset is the offset of the byte in the value produced by the load.
6909   LoadSDNode *Load = nullptr;
6910   unsigned ByteOffset = 0;
6911 
6912   ByteProvider() = default;
6913 
6914   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6915     return ByteProvider(Load, ByteOffset);
6916   }
6917 
6918   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6919 
6920   bool isConstantZero() const { return !Load; }
6921   bool isMemory() const { return Load; }
6922 
6923   bool operator==(const ByteProvider &Other) const {
6924     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6925   }
6926 
6927 private:
6928   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6929       : Load(Load), ByteOffset(ByteOffset) {}
6930 };
6931 
6932 } // end anonymous namespace
6933 
6934 /// Recursively traverses the expression calculating the origin of the requested
6935 /// byte of the given value. Returns None if the provider can't be calculated.
6936 ///
6937 /// For all the values except the root of the expression verifies that the value
6938 /// has exactly one use and if it's not true return None. This way if the origin
6939 /// of the byte is returned it's guaranteed that the values which contribute to
6940 /// the byte are not used outside of this expression.
6941 ///
6942 /// Because the parts of the expression are not allowed to have more than one
6943 /// use this function iterates over trees, not DAGs. So it never visits the same
6944 /// node more than once.
6945 static const Optional<ByteProvider>
6946 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6947                       bool Root = false) {
6948   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6949   if (Depth == 10)
6950     return None;
6951 
6952   if (!Root && !Op.hasOneUse())
6953     return None;
6954 
6955   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6956   unsigned BitWidth = Op.getValueSizeInBits();
6957   if (BitWidth % 8 != 0)
6958     return None;
6959   unsigned ByteWidth = BitWidth / 8;
6960   assert(Index < ByteWidth && "invalid index requested");
6961   (void) ByteWidth;
6962 
6963   switch (Op.getOpcode()) {
6964   case ISD::OR: {
6965     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6966     if (!LHS)
6967       return None;
6968     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6969     if (!RHS)
6970       return None;
6971 
6972     if (LHS->isConstantZero())
6973       return RHS;
6974     if (RHS->isConstantZero())
6975       return LHS;
6976     return None;
6977   }
6978   case ISD::SHL: {
6979     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6980     if (!ShiftOp)
6981       return None;
6982 
6983     uint64_t BitShift = ShiftOp->getZExtValue();
6984     if (BitShift % 8 != 0)
6985       return None;
6986     uint64_t ByteShift = BitShift / 8;
6987 
6988     return Index < ByteShift
6989                ? ByteProvider::getConstantZero()
6990                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6991                                        Depth + 1);
6992   }
6993   case ISD::ANY_EXTEND:
6994   case ISD::SIGN_EXTEND:
6995   case ISD::ZERO_EXTEND: {
6996     SDValue NarrowOp = Op->getOperand(0);
6997     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6998     if (NarrowBitWidth % 8 != 0)
6999       return None;
7000     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7001 
7002     if (Index >= NarrowByteWidth)
7003       return Op.getOpcode() == ISD::ZERO_EXTEND
7004                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7005                  : None;
7006     return calculateByteProvider(NarrowOp, Index, Depth + 1);
7007   }
7008   case ISD::BSWAP:
7009     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
7010                                  Depth + 1);
7011   case ISD::LOAD: {
7012     auto L = cast<LoadSDNode>(Op.getNode());
7013     if (!L->isSimple() || L->isIndexed())
7014       return None;
7015 
7016     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
7017     if (NarrowBitWidth % 8 != 0)
7018       return None;
7019     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
7020 
7021     if (Index >= NarrowByteWidth)
7022       return L->getExtensionType() == ISD::ZEXTLOAD
7023                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
7024                  : None;
7025     return ByteProvider::getMemory(L, Index);
7026   }
7027   }
7028 
7029   return None;
7030 }
7031 
7032 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
7033   return i;
7034 }
7035 
7036 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
7037   return BW - i - 1;
7038 }
7039 
7040 // Check if the bytes offsets we are looking at match with either big or
7041 // little endian value loaded. Return true for big endian, false for little
7042 // endian, and None if match failed.
7043 static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
7044                                   int64_t FirstOffset) {
7045   // The endian can be decided only when it is 2 bytes at least.
7046   unsigned Width = ByteOffsets.size();
7047   if (Width < 2)
7048     return None;
7049 
7050   bool BigEndian = true, LittleEndian = true;
7051   for (unsigned i = 0; i < Width; i++) {
7052     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
7053     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
7054     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
7055     if (!BigEndian && !LittleEndian)
7056       return None;
7057   }
7058 
7059   assert((BigEndian != LittleEndian) && "It should be either big endian or"
7060                                         "little endian");
7061   return BigEndian;
7062 }
7063 
7064 static SDValue stripTruncAndExt(SDValue Value) {
7065   switch (Value.getOpcode()) {
7066   case ISD::TRUNCATE:
7067   case ISD::ZERO_EXTEND:
7068   case ISD::SIGN_EXTEND:
7069   case ISD::ANY_EXTEND:
7070     return stripTruncAndExt(Value.getOperand(0));
7071   }
7072   return Value;
7073 }
7074 
7075 /// Match a pattern where a wide type scalar value is stored by several narrow
7076 /// stores. Fold it into a single store or a BSWAP and a store if the targets
7077 /// supports it.
7078 ///
7079 /// Assuming little endian target:
7080 ///  i8 *p = ...
7081 ///  i32 val = ...
7082 ///  p[0] = (val >> 0) & 0xFF;
7083 ///  p[1] = (val >> 8) & 0xFF;
7084 ///  p[2] = (val >> 16) & 0xFF;
7085 ///  p[3] = (val >> 24) & 0xFF;
7086 /// =>
7087 ///  *((i32)p) = val;
7088 ///
7089 ///  i8 *p = ...
7090 ///  i32 val = ...
7091 ///  p[0] = (val >> 24) & 0xFF;
7092 ///  p[1] = (val >> 16) & 0xFF;
7093 ///  p[2] = (val >> 8) & 0xFF;
7094 ///  p[3] = (val >> 0) & 0xFF;
7095 /// =>
7096 ///  *((i32)p) = BSWAP(val);
7097 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
7098   // The matching looks for "store (trunc x)" patterns that appear early but are
7099   // likely to be replaced by truncating store nodes during combining.
7100   // TODO: If there is evidence that running this later would help, this
7101   //       limitation could be removed. Legality checks may need to be added
7102   //       for the created store and optional bswap/rotate.
7103   if (LegalOperations)
7104     return SDValue();
7105 
7106   // Collect all the stores in the chain.
7107   SDValue Chain;
7108   SmallVector<StoreSDNode *, 8> Stores;
7109   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
7110     // TODO: Allow unordered atomics when wider type is legal (see D66309)
7111     EVT MemVT = Store->getMemoryVT();
7112     if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
7113         !Store->isSimple() || Store->isIndexed())
7114       return SDValue();
7115     Stores.push_back(Store);
7116     Chain = Store->getChain();
7117   }
7118   // There is no reason to continue if we do not have at least a pair of stores.
7119   if (Stores.size() < 2)
7120     return SDValue();
7121 
7122   // Handle simple types only.
7123   LLVMContext &Context = *DAG.getContext();
7124   unsigned NumStores = Stores.size();
7125   unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
7126   unsigned WideNumBits = NumStores * NarrowNumBits;
7127   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
7128   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
7129     return SDValue();
7130 
7131   // Check if all bytes of the source value that we are looking at are stored
7132   // to the same base address. Collect offsets from Base address into OffsetMap.
7133   SDValue SourceValue;
7134   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
7135   int64_t FirstOffset = INT64_MAX;
7136   StoreSDNode *FirstStore = nullptr;
7137   Optional<BaseIndexOffset> Base;
7138   for (auto Store : Stores) {
7139     // All the stores store different parts of the CombinedValue. A truncate is
7140     // required to get the partial value.
7141     SDValue Trunc = Store->getValue();
7142     if (Trunc.getOpcode() != ISD::TRUNCATE)
7143       return SDValue();
7144     // Other than the first/last part, a shift operation is required to get the
7145     // offset.
7146     int64_t Offset = 0;
7147     SDValue WideVal = Trunc.getOperand(0);
7148     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
7149         isa<ConstantSDNode>(WideVal.getOperand(1))) {
7150       // The shift amount must be a constant multiple of the narrow type.
7151       // It is translated to the offset address in the wide source value "y".
7152       //
7153       // x = srl y, ShiftAmtC
7154       // i8 z = trunc x
7155       // store z, ...
7156       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
7157       if (ShiftAmtC % NarrowNumBits != 0)
7158         return SDValue();
7159 
7160       Offset = ShiftAmtC / NarrowNumBits;
7161       WideVal = WideVal.getOperand(0);
7162     }
7163 
7164     // Stores must share the same source value with different offsets.
7165     // Truncate and extends should be stripped to get the single source value.
7166     if (!SourceValue)
7167       SourceValue = WideVal;
7168     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
7169       return SDValue();
7170     else if (SourceValue.getValueType() != WideVT) {
7171       if (WideVal.getValueType() == WideVT ||
7172           WideVal.getScalarValueSizeInBits() >
7173               SourceValue.getScalarValueSizeInBits())
7174         SourceValue = WideVal;
7175       // Give up if the source value type is smaller than the store size.
7176       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
7177         return SDValue();
7178     }
7179 
7180     // Stores must share the same base address.
7181     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
7182     int64_t ByteOffsetFromBase = 0;
7183     if (!Base)
7184       Base = Ptr;
7185     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7186       return SDValue();
7187 
7188     // Remember the first store.
7189     if (ByteOffsetFromBase < FirstOffset) {
7190       FirstStore = Store;
7191       FirstOffset = ByteOffsetFromBase;
7192     }
7193     // Map the offset in the store and the offset in the combined value, and
7194     // early return if it has been set before.
7195     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
7196       return SDValue();
7197     OffsetMap[Offset] = ByteOffsetFromBase;
7198   }
7199 
7200   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7201   assert(FirstStore && "First store must be set");
7202 
7203   // Check that a store of the wide type is both allowed and fast on the target
7204   const DataLayout &Layout = DAG.getDataLayout();
7205   bool Fast = false;
7206   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
7207                                         *FirstStore->getMemOperand(), &Fast);
7208   if (!Allowed || !Fast)
7209     return SDValue();
7210 
7211   // Check if the pieces of the value are going to the expected places in memory
7212   // to merge the stores.
7213   auto checkOffsets = [&](bool MatchLittleEndian) {
7214     if (MatchLittleEndian) {
7215       for (unsigned i = 0; i != NumStores; ++i)
7216         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
7217           return false;
7218     } else { // MatchBigEndian by reversing loop counter.
7219       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
7220         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
7221           return false;
7222     }
7223     return true;
7224   };
7225 
7226   // Check if the offsets line up for the native data layout of this target.
7227   bool NeedBswap = false;
7228   bool NeedRotate = false;
7229   if (!checkOffsets(Layout.isLittleEndian())) {
7230     // Special-case: check if byte offsets line up for the opposite endian.
7231     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
7232       NeedBswap = true;
7233     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
7234       NeedRotate = true;
7235     else
7236       return SDValue();
7237   }
7238 
7239   SDLoc DL(N);
7240   if (WideVT != SourceValue.getValueType()) {
7241     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
7242            "Unexpected store value to merge");
7243     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
7244   }
7245 
7246   // Before legalize we can introduce illegal bswaps/rotates which will be later
7247   // converted to an explicit bswap sequence. This way we end up with a single
7248   // store and byte shuffling instead of several stores and byte shuffling.
7249   if (NeedBswap) {
7250     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
7251   } else if (NeedRotate) {
7252     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
7253     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
7254     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
7255   }
7256 
7257   SDValue NewStore =
7258       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
7259                    FirstStore->getPointerInfo(), FirstStore->getAlign());
7260 
7261   // Rely on other DAG combine rules to remove the other individual stores.
7262   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
7263   return NewStore;
7264 }
7265 
7266 /// Match a pattern where a wide type scalar value is loaded by several narrow
7267 /// loads and combined by shifts and ors. Fold it into a single load or a load
7268 /// and a BSWAP if the targets supports it.
7269 ///
7270 /// Assuming little endian target:
7271 ///  i8 *a = ...
7272 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
7273 /// =>
7274 ///  i32 val = *((i32)a)
7275 ///
7276 ///  i8 *a = ...
7277 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
7278 /// =>
7279 ///  i32 val = BSWAP(*((i32)a))
7280 ///
7281 /// TODO: This rule matches complex patterns with OR node roots and doesn't
7282 /// interact well with the worklist mechanism. When a part of the pattern is
7283 /// updated (e.g. one of the loads) its direct users are put into the worklist,
7284 /// but the root node of the pattern which triggers the load combine is not
7285 /// necessarily a direct user of the changed node. For example, once the address
7286 /// of t28 load is reassociated load combine won't be triggered:
7287 ///             t25: i32 = add t4, Constant:i32<2>
7288 ///           t26: i64 = sign_extend t25
7289 ///        t27: i64 = add t2, t26
7290 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
7291 ///     t29: i32 = zero_extend t28
7292 ///   t32: i32 = shl t29, Constant:i8<8>
7293 /// t33: i32 = or t23, t32
7294 /// As a possible fix visitLoad can check if the load can be a part of a load
7295 /// combine pattern and add corresponding OR roots to the worklist.
7296 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
7297   assert(N->getOpcode() == ISD::OR &&
7298          "Can only match load combining against OR nodes");
7299 
7300   // Handles simple types only
7301   EVT VT = N->getValueType(0);
7302   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
7303     return SDValue();
7304   unsigned ByteWidth = VT.getSizeInBits() / 8;
7305 
7306   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
7307   auto MemoryByteOffset = [&] (ByteProvider P) {
7308     assert(P.isMemory() && "Must be a memory byte provider");
7309     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
7310     assert(LoadBitWidth % 8 == 0 &&
7311            "can only analyze providers for individual bytes not bit");
7312     unsigned LoadByteWidth = LoadBitWidth / 8;
7313     return IsBigEndianTarget
7314             ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
7315             : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
7316   };
7317 
7318   Optional<BaseIndexOffset> Base;
7319   SDValue Chain;
7320 
7321   SmallPtrSet<LoadSDNode *, 8> Loads;
7322   Optional<ByteProvider> FirstByteProvider;
7323   int64_t FirstOffset = INT64_MAX;
7324 
7325   // Check if all the bytes of the OR we are looking at are loaded from the same
7326   // base address. Collect bytes offsets from Base address in ByteOffsets.
7327   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
7328   unsigned ZeroExtendedBytes = 0;
7329   for (int i = ByteWidth - 1; i >= 0; --i) {
7330     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
7331     if (!P)
7332       return SDValue();
7333 
7334     if (P->isConstantZero()) {
7335       // It's OK for the N most significant bytes to be 0, we can just
7336       // zero-extend the load.
7337       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
7338         return SDValue();
7339       continue;
7340     }
7341     assert(P->isMemory() && "provenance should either be memory or zero");
7342 
7343     LoadSDNode *L = P->Load;
7344     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
7345            !L->isIndexed() &&
7346            "Must be enforced by calculateByteProvider");
7347     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
7348 
7349     // All loads must share the same chain
7350     SDValue LChain = L->getChain();
7351     if (!Chain)
7352       Chain = LChain;
7353     else if (Chain != LChain)
7354       return SDValue();
7355 
7356     // Loads must share the same base address
7357     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
7358     int64_t ByteOffsetFromBase = 0;
7359     if (!Base)
7360       Base = Ptr;
7361     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
7362       return SDValue();
7363 
7364     // Calculate the offset of the current byte from the base address
7365     ByteOffsetFromBase += MemoryByteOffset(*P);
7366     ByteOffsets[i] = ByteOffsetFromBase;
7367 
7368     // Remember the first byte load
7369     if (ByteOffsetFromBase < FirstOffset) {
7370       FirstByteProvider = P;
7371       FirstOffset = ByteOffsetFromBase;
7372     }
7373 
7374     Loads.insert(L);
7375   }
7376   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
7377          "memory, so there must be at least one load which produces the value");
7378   assert(Base && "Base address of the accessed memory location must be set");
7379   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
7380 
7381   bool NeedsZext = ZeroExtendedBytes > 0;
7382 
7383   EVT MemVT =
7384       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
7385 
7386   if (!MemVT.isSimple())
7387     return SDValue();
7388 
7389   // Before legalize we can introduce too wide illegal loads which will be later
7390   // split into legal sized loads. This enables us to combine i64 load by i8
7391   // patterns to a couple of i32 loads on 32 bit targets.
7392   if (LegalOperations &&
7393       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
7394                             MemVT))
7395     return SDValue();
7396 
7397   // Check if the bytes of the OR we are looking at match with either big or
7398   // little endian value load
7399   Optional<bool> IsBigEndian = isBigEndian(
7400       makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
7401   if (!IsBigEndian.hasValue())
7402     return SDValue();
7403 
7404   assert(FirstByteProvider && "must be set");
7405 
7406   // Ensure that the first byte is loaded from zero offset of the first load.
7407   // So the combined value can be loaded from the first load address.
7408   if (MemoryByteOffset(*FirstByteProvider) != 0)
7409     return SDValue();
7410   LoadSDNode *FirstLoad = FirstByteProvider->Load;
7411 
7412   // The node we are looking at matches with the pattern, check if we can
7413   // replace it with a single (possibly zero-extended) load and bswap + shift if
7414   // needed.
7415 
7416   // If the load needs byte swap check if the target supports it
7417   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
7418 
7419   // Before legalize we can introduce illegal bswaps which will be later
7420   // converted to an explicit bswap sequence. This way we end up with a single
7421   // load and byte shuffling instead of several loads and byte shuffling.
7422   // We do not introduce illegal bswaps when zero-extending as this tends to
7423   // introduce too many arithmetic instructions.
7424   if (NeedsBswap && (LegalOperations || NeedsZext) &&
7425       !TLI.isOperationLegal(ISD::BSWAP, VT))
7426     return SDValue();
7427 
7428   // If we need to bswap and zero extend, we have to insert a shift. Check that
7429   // it is legal.
7430   if (NeedsBswap && NeedsZext && LegalOperations &&
7431       !TLI.isOperationLegal(ISD::SHL, VT))
7432     return SDValue();
7433 
7434   // Check that a load of the wide type is both allowed and fast on the target
7435   bool Fast = false;
7436   bool Allowed =
7437       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
7438                              *FirstLoad->getMemOperand(), &Fast);
7439   if (!Allowed || !Fast)
7440     return SDValue();
7441 
7442   SDValue NewLoad =
7443       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
7444                      Chain, FirstLoad->getBasePtr(),
7445                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
7446 
7447   // Transfer chain users from old loads to the new load.
7448   for (LoadSDNode *L : Loads)
7449     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
7450 
7451   if (!NeedsBswap)
7452     return NewLoad;
7453 
7454   SDValue ShiftedLoad =
7455       NeedsZext
7456           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
7457                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
7458                                                    SDLoc(N), LegalOperations))
7459           : NewLoad;
7460   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
7461 }
7462 
7463 // If the target has andn, bsl, or a similar bit-select instruction,
7464 // we want to unfold masked merge, with canonical pattern of:
7465 //   |        A  |  |B|
7466 //   ((x ^ y) & m) ^ y
7467 //    |  D  |
7468 // Into:
7469 //   (x & m) | (y & ~m)
7470 // If y is a constant, and the 'andn' does not work with immediates,
7471 // we unfold into a different pattern:
7472 //   ~(~x & m) & (m | y)
7473 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
7474 //       the very least that breaks andnpd / andnps patterns, and because those
7475 //       patterns are simplified in IR and shouldn't be created in the DAG
7476 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
7477   assert(N->getOpcode() == ISD::XOR);
7478 
7479   // Don't touch 'not' (i.e. where y = -1).
7480   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
7481     return SDValue();
7482 
7483   EVT VT = N->getValueType(0);
7484 
7485   // There are 3 commutable operators in the pattern,
7486   // so we have to deal with 8 possible variants of the basic pattern.
7487   SDValue X, Y, M;
7488   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7489     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7490       return false;
7491     SDValue Xor = And.getOperand(XorIdx);
7492     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7493       return false;
7494     SDValue Xor0 = Xor.getOperand(0);
7495     SDValue Xor1 = Xor.getOperand(1);
7496     // Don't touch 'not' (i.e. where y = -1).
7497     if (isAllOnesOrAllOnesSplat(Xor1))
7498       return false;
7499     if (Other == Xor0)
7500       std::swap(Xor0, Xor1);
7501     if (Other != Xor1)
7502       return false;
7503     X = Xor0;
7504     Y = Xor1;
7505     M = And.getOperand(XorIdx ? 0 : 1);
7506     return true;
7507   };
7508 
7509   SDValue N0 = N->getOperand(0);
7510   SDValue N1 = N->getOperand(1);
7511   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7512       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7513     return SDValue();
7514 
7515   // Don't do anything if the mask is constant. This should not be reachable.
7516   // InstCombine should have already unfolded this pattern, and DAGCombiner
7517   // probably shouldn't produce it, too.
7518   if (isa<ConstantSDNode>(M.getNode()))
7519     return SDValue();
7520 
7521   // We can transform if the target has AndNot
7522   if (!TLI.hasAndNot(M))
7523     return SDValue();
7524 
7525   SDLoc DL(N);
7526 
7527   // If Y is a constant, check that 'andn' works with immediates.
7528   if (!TLI.hasAndNot(Y)) {
7529     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7530     // If not, we need to do a bit more work to make sure andn is still used.
7531     SDValue NotX = DAG.getNOT(DL, X, VT);
7532     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7533     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7534     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7535     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7536   }
7537 
7538   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7539   SDValue NotM = DAG.getNOT(DL, M, VT);
7540   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7541 
7542   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7543 }
7544 
7545 SDValue DAGCombiner::visitXOR(SDNode *N) {
7546   SDValue N0 = N->getOperand(0);
7547   SDValue N1 = N->getOperand(1);
7548   EVT VT = N0.getValueType();
7549 
7550   // fold vector ops
7551   if (VT.isVector()) {
7552     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7553       return FoldedVOp;
7554 
7555     // fold (xor x, 0) -> x, vector edition
7556     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7557       return N1;
7558     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7559       return N0;
7560   }
7561 
7562   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7563   SDLoc DL(N);
7564   if (N0.isUndef() && N1.isUndef())
7565     return DAG.getConstant(0, DL, VT);
7566 
7567   // fold (xor x, undef) -> undef
7568   if (N0.isUndef())
7569     return N0;
7570   if (N1.isUndef())
7571     return N1;
7572 
7573   // fold (xor c1, c2) -> c1^c2
7574   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
7575     return C;
7576 
7577   // canonicalize constant to RHS
7578   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7579      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7580     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7581 
7582   // fold (xor x, 0) -> x
7583   if (isNullConstant(N1))
7584     return N0;
7585 
7586   if (SDValue NewSel = foldBinOpIntoSelect(N))
7587     return NewSel;
7588 
7589   // reassociate xor
7590   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7591     return RXOR;
7592 
7593   // fold !(x cc y) -> (x !cc y)
7594   unsigned N0Opcode = N0.getOpcode();
7595   SDValue LHS, RHS, CC;
7596   if (TLI.isConstTrueVal(N1.getNode()) &&
7597       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
7598     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7599                                                LHS.getValueType());
7600     if (!LegalOperations ||
7601         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7602       switch (N0Opcode) {
7603       default:
7604         llvm_unreachable("Unhandled SetCC Equivalent!");
7605       case ISD::SETCC:
7606         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7607       case ISD::SELECT_CC:
7608         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7609                                N0.getOperand(3), NotCC);
7610       case ISD::STRICT_FSETCC:
7611       case ISD::STRICT_FSETCCS: {
7612         if (N0.hasOneUse()) {
7613           // FIXME Can we handle multiple uses? Could we token factor the chain
7614           // results from the new/old setcc?
7615           SDValue SetCC =
7616               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
7617                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
7618           CombineTo(N, SetCC);
7619           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
7620           recursivelyDeleteUnusedNodes(N0.getNode());
7621           return SDValue(N, 0); // Return N so it doesn't get rechecked!
7622         }
7623         break;
7624       }
7625       }
7626     }
7627   }
7628 
7629   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7630   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7631       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7632     SDValue V = N0.getOperand(0);
7633     SDLoc DL0(N0);
7634     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7635                     DAG.getConstant(1, DL0, V.getValueType()));
7636     AddToWorklist(V.getNode());
7637     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7638   }
7639 
7640   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7641   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7642       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7643     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7644     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7645       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7646       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7647       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7648       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7649       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7650     }
7651   }
7652   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7653   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7654       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7655     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7656     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7657       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7658       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7659       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7660       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7661       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7662     }
7663   }
7664 
7665   // fold (not (neg x)) -> (add X, -1)
7666   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7667   // Y is a constant or the subtract has a single use.
7668   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7669       isNullConstant(N0.getOperand(0))) {
7670     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7671                        DAG.getAllOnesConstant(DL, VT));
7672   }
7673 
7674   // fold (not (add X, -1)) -> (neg X)
7675   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
7676       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
7677     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
7678                        N0.getOperand(0));
7679   }
7680 
7681   // fold (xor (and x, y), y) -> (and (not x), y)
7682   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7683     SDValue X = N0.getOperand(0);
7684     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7685     AddToWorklist(NotX.getNode());
7686     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7687   }
7688 
7689   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7690     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7691     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7692     unsigned BitWidth = VT.getScalarSizeInBits();
7693     if (XorC && ShiftC) {
7694       // Don't crash on an oversized shift. We can not guarantee that a bogus
7695       // shift has been simplified to undef.
7696       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7697       if (ShiftAmt < BitWidth) {
7698         APInt Ones = APInt::getAllOnesValue(BitWidth);
7699         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7700         if (XorC->getAPIntValue() == Ones) {
7701           // If the xor constant is a shifted -1, do a 'not' before the shift:
7702           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7703           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7704           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7705           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7706         }
7707       }
7708     }
7709   }
7710 
7711   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7712   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7713     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7714     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7715     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7716       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7717       SDValue S0 = S.getOperand(0);
7718       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
7719         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7720           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
7721             return DAG.getNode(ISD::ABS, DL, VT, S0);
7722     }
7723   }
7724 
7725   // fold (xor x, x) -> 0
7726   if (N0 == N1)
7727     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7728 
7729   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7730   // Here is a concrete example of this equivalence:
7731   // i16   x ==  14
7732   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7733   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7734   //
7735   // =>
7736   //
7737   // i16     ~1      == 0b1111111111111110
7738   // i16 rol(~1, 14) == 0b1011111111111111
7739   //
7740   // Some additional tips to help conceptualize this transform:
7741   // - Try to see the operation as placing a single zero in a value of all ones.
7742   // - There exists no value for x which would allow the result to contain zero.
7743   // - Values of x larger than the bitwidth are undefined and do not require a
7744   //   consistent result.
7745   // - Pushing the zero left requires shifting one bits in from the right.
7746   // A rotate left of ~1 is a nice way of achieving the desired result.
7747   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7748       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7749     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7750                        N0.getOperand(1));
7751   }
7752 
7753   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7754   if (N0Opcode == N1.getOpcode())
7755     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7756       return V;
7757 
7758   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7759   if (SDValue MM = unfoldMaskedMerge(N))
7760     return MM;
7761 
7762   // Simplify the expression using non-local knowledge.
7763   if (SimplifyDemandedBits(SDValue(N, 0)))
7764     return SDValue(N, 0);
7765 
7766   if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
7767     return Combined;
7768 
7769   return SDValue();
7770 }
7771 
7772 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7773 /// shift-by-constant operand with identical opcode, we may be able to convert
7774 /// that into 2 independent shifts followed by the logic op. This is a
7775 /// throughput improvement.
7776 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7777   // Match a one-use bitwise logic op.
7778   SDValue LogicOp = Shift->getOperand(0);
7779   if (!LogicOp.hasOneUse())
7780     return SDValue();
7781 
7782   unsigned LogicOpcode = LogicOp.getOpcode();
7783   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7784       LogicOpcode != ISD::XOR)
7785     return SDValue();
7786 
7787   // Find a matching one-use shift by constant.
7788   unsigned ShiftOpcode = Shift->getOpcode();
7789   SDValue C1 = Shift->getOperand(1);
7790   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7791   assert(C1Node && "Expected a shift with constant operand");
7792   const APInt &C1Val = C1Node->getAPIntValue();
7793   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7794                              const APInt *&ShiftAmtVal) {
7795     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7796       return false;
7797 
7798     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7799     if (!ShiftCNode)
7800       return false;
7801 
7802     // Capture the shifted operand and shift amount value.
7803     ShiftOp = V.getOperand(0);
7804     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7805 
7806     // Shift amount types do not have to match their operand type, so check that
7807     // the constants are the same width.
7808     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7809       return false;
7810 
7811     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7812     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7813       return false;
7814 
7815     return true;
7816   };
7817 
7818   // Logic ops are commutative, so check each operand for a match.
7819   SDValue X, Y;
7820   const APInt *C0Val;
7821   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7822     Y = LogicOp.getOperand(1);
7823   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7824     Y = LogicOp.getOperand(0);
7825   else
7826     return SDValue();
7827 
7828   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7829   SDLoc DL(Shift);
7830   EVT VT = Shift->getValueType(0);
7831   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7832   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7833   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7834   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7835   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7836 }
7837 
7838 /// Handle transforms common to the three shifts, when the shift amount is a
7839 /// constant.
7840 /// We are looking for: (shift being one of shl/sra/srl)
7841 ///   shift (binop X, C0), C1
7842 /// And want to transform into:
7843 ///   binop (shift X, C1), (shift C0, C1)
7844 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7845   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7846 
7847   // Do not turn a 'not' into a regular xor.
7848   if (isBitwiseNot(N->getOperand(0)))
7849     return SDValue();
7850 
7851   // The inner binop must be one-use, since we want to replace it.
7852   SDValue LHS = N->getOperand(0);
7853   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7854     return SDValue();
7855 
7856   // TODO: This is limited to early combining because it may reveal regressions
7857   //       otherwise. But since we just checked a target hook to see if this is
7858   //       desirable, that should have filtered out cases where this interferes
7859   //       with some other pattern matching.
7860   if (!LegalTypes)
7861     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7862       return R;
7863 
7864   // We want to pull some binops through shifts, so that we have (and (shift))
7865   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7866   // thing happens with address calculations, so it's important to canonicalize
7867   // it.
7868   switch (LHS.getOpcode()) {
7869   default:
7870     return SDValue();
7871   case ISD::OR:
7872   case ISD::XOR:
7873   case ISD::AND:
7874     break;
7875   case ISD::ADD:
7876     if (N->getOpcode() != ISD::SHL)
7877       return SDValue(); // only shl(add) not sr[al](add).
7878     break;
7879   }
7880 
7881   // We require the RHS of the binop to be a constant and not opaque as well.
7882   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7883   if (!BinOpCst)
7884     return SDValue();
7885 
7886   // FIXME: disable this unless the input to the binop is a shift by a constant
7887   // or is copy/select. Enable this in other cases when figure out it's exactly
7888   // profitable.
7889   SDValue BinOpLHSVal = LHS.getOperand(0);
7890   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7891                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7892                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7893                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7894   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7895                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7896 
7897   if (!IsShiftByConstant && !IsCopyOrSelect)
7898     return SDValue();
7899 
7900   if (IsCopyOrSelect && N->hasOneUse())
7901     return SDValue();
7902 
7903   // Fold the constants, shifting the binop RHS by the shift amount.
7904   SDLoc DL(N);
7905   EVT VT = N->getValueType(0);
7906   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7907                                N->getOperand(1));
7908   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7909 
7910   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7911                                  N->getOperand(1));
7912   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7913 }
7914 
7915 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7916   assert(N->getOpcode() == ISD::TRUNCATE);
7917   assert(N->getOperand(0).getOpcode() == ISD::AND);
7918 
7919   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7920   EVT TruncVT = N->getValueType(0);
7921   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7922       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7923     SDValue N01 = N->getOperand(0).getOperand(1);
7924     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7925       SDLoc DL(N);
7926       SDValue N00 = N->getOperand(0).getOperand(0);
7927       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7928       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7929       AddToWorklist(Trunc00.getNode());
7930       AddToWorklist(Trunc01.getNode());
7931       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7932     }
7933   }
7934 
7935   return SDValue();
7936 }
7937 
7938 SDValue DAGCombiner::visitRotate(SDNode *N) {
7939   SDLoc dl(N);
7940   SDValue N0 = N->getOperand(0);
7941   SDValue N1 = N->getOperand(1);
7942   EVT VT = N->getValueType(0);
7943   unsigned Bitsize = VT.getScalarSizeInBits();
7944 
7945   // fold (rot x, 0) -> x
7946   if (isNullOrNullSplat(N1))
7947     return N0;
7948 
7949   // fold (rot x, c) -> x iff (c % BitSize) == 0
7950   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7951     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7952     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7953       return N0;
7954   }
7955 
7956   // fold (rot x, c) -> (rot x, c % BitSize)
7957   bool OutOfRange = false;
7958   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
7959     OutOfRange |= C->getAPIntValue().uge(Bitsize);
7960     return true;
7961   };
7962   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
7963     EVT AmtVT = N1.getValueType();
7964     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
7965     if (SDValue Amt =
7966             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
7967       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
7968   }
7969 
7970   // rot i16 X, 8 --> bswap X
7971   auto *RotAmtC = isConstOrConstSplat(N1);
7972   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
7973       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
7974     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
7975 
7976   // Simplify the operands using demanded-bits information.
7977   if (SimplifyDemandedBits(SDValue(N, 0)))
7978     return SDValue(N, 0);
7979 
7980   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7981   if (N1.getOpcode() == ISD::TRUNCATE &&
7982       N1.getOperand(0).getOpcode() == ISD::AND) {
7983     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7984       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7985   }
7986 
7987   unsigned NextOp = N0.getOpcode();
7988   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7989   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7990     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7991     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7992     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7993       EVT ShiftVT = C1->getValueType(0);
7994       bool SameSide = (N->getOpcode() == NextOp);
7995       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7996       if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
7997               CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) {
7998         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7999         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
8000             ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC});
8001         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
8002                            CombinedShiftNorm);
8003       }
8004     }
8005   }
8006   return SDValue();
8007 }
8008 
8009 SDValue DAGCombiner::visitSHL(SDNode *N) {
8010   SDValue N0 = N->getOperand(0);
8011   SDValue N1 = N->getOperand(1);
8012   if (SDValue V = DAG.simplifyShift(N0, N1))
8013     return V;
8014 
8015   EVT VT = N0.getValueType();
8016   EVT ShiftVT = N1.getValueType();
8017   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8018 
8019   // fold vector ops
8020   if (VT.isVector()) {
8021     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8022       return FoldedVOp;
8023 
8024     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
8025     // If setcc produces all-one true value then:
8026     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
8027     if (N1CV && N1CV->isConstant()) {
8028       if (N0.getOpcode() == ISD::AND) {
8029         SDValue N00 = N0->getOperand(0);
8030         SDValue N01 = N0->getOperand(1);
8031         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
8032 
8033         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
8034             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
8035                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
8036           if (SDValue C =
8037                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
8038             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
8039         }
8040       }
8041     }
8042   }
8043 
8044   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8045 
8046   // fold (shl c1, c2) -> c1<<c2
8047   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
8048     return C;
8049 
8050   if (SDValue NewSel = foldBinOpIntoSelect(N))
8051     return NewSel;
8052 
8053   // if (shl x, c) is known to be zero, return 0
8054   if (DAG.MaskedValueIsZero(SDValue(N, 0),
8055                             APInt::getAllOnesValue(OpSizeInBits)))
8056     return DAG.getConstant(0, SDLoc(N), VT);
8057 
8058   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
8059   if (N1.getOpcode() == ISD::TRUNCATE &&
8060       N1.getOperand(0).getOpcode() == ISD::AND) {
8061     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8062       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
8063   }
8064 
8065   if (SimplifyDemandedBits(SDValue(N, 0)))
8066     return SDValue(N, 0);
8067 
8068   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
8069   if (N0.getOpcode() == ISD::SHL) {
8070     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8071                                           ConstantSDNode *RHS) {
8072       APInt c1 = LHS->getAPIntValue();
8073       APInt c2 = RHS->getAPIntValue();
8074       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8075       return (c1 + c2).uge(OpSizeInBits);
8076     };
8077     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8078       return DAG.getConstant(0, SDLoc(N), VT);
8079 
8080     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8081                                        ConstantSDNode *RHS) {
8082       APInt c1 = LHS->getAPIntValue();
8083       APInt c2 = RHS->getAPIntValue();
8084       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8085       return (c1 + c2).ult(OpSizeInBits);
8086     };
8087     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8088       SDLoc DL(N);
8089       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8090       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
8091     }
8092   }
8093 
8094   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
8095   // For this to be valid, the second form must not preserve any of the bits
8096   // that are shifted out by the inner shift in the first form.  This means
8097   // the outer shift size must be >= the number of bits added by the ext.
8098   // As a corollary, we don't care what kind of ext it is.
8099   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
8100        N0.getOpcode() == ISD::ANY_EXTEND ||
8101        N0.getOpcode() == ISD::SIGN_EXTEND) &&
8102       N0.getOperand(0).getOpcode() == ISD::SHL) {
8103     SDValue N0Op0 = N0.getOperand(0);
8104     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8105     EVT InnerVT = N0Op0.getValueType();
8106     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
8107 
8108     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8109                                                          ConstantSDNode *RHS) {
8110       APInt c1 = LHS->getAPIntValue();
8111       APInt c2 = RHS->getAPIntValue();
8112       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8113       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8114              (c1 + c2).uge(OpSizeInBits);
8115     };
8116     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
8117                                   /*AllowUndefs*/ false,
8118                                   /*AllowTypeMismatch*/ true))
8119       return DAG.getConstant(0, SDLoc(N), VT);
8120 
8121     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
8122                                                       ConstantSDNode *RHS) {
8123       APInt c1 = LHS->getAPIntValue();
8124       APInt c2 = RHS->getAPIntValue();
8125       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8126       return c2.uge(OpSizeInBits - InnerBitwidth) &&
8127              (c1 + c2).ult(OpSizeInBits);
8128     };
8129     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
8130                                   /*AllowUndefs*/ false,
8131                                   /*AllowTypeMismatch*/ true)) {
8132       SDLoc DL(N);
8133       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
8134       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
8135       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
8136       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
8137     }
8138   }
8139 
8140   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
8141   // Only fold this if the inner zext has no other uses to avoid increasing
8142   // the total number of instructions.
8143   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
8144       N0.getOperand(0).getOpcode() == ISD::SRL) {
8145     SDValue N0Op0 = N0.getOperand(0);
8146     SDValue InnerShiftAmt = N0Op0.getOperand(1);
8147 
8148     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8149       APInt c1 = LHS->getAPIntValue();
8150       APInt c2 = RHS->getAPIntValue();
8151       zeroExtendToMatch(c1, c2);
8152       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
8153     };
8154     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
8155                                   /*AllowUndefs*/ false,
8156                                   /*AllowTypeMismatch*/ true)) {
8157       SDLoc DL(N);
8158       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
8159       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
8160       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
8161       AddToWorklist(NewSHL.getNode());
8162       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
8163     }
8164   }
8165 
8166   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
8167   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
8168   // TODO - support non-uniform vector shift amounts.
8169   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
8170       N0->getFlags().hasExact()) {
8171     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8172       uint64_t C1 = N0C1->getZExtValue();
8173       uint64_t C2 = N1C->getZExtValue();
8174       SDLoc DL(N);
8175       if (C1 <= C2)
8176         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8177                            DAG.getConstant(C2 - C1, DL, ShiftVT));
8178       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
8179                          DAG.getConstant(C1 - C2, DL, ShiftVT));
8180     }
8181   }
8182 
8183   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
8184   //                               (and (srl x, (sub c1, c2), MASK)
8185   // Only fold this if the inner shift has no other uses -- if it does, folding
8186   // this will increase the total number of instructions.
8187   // TODO - drop hasOneUse requirement if c1 == c2?
8188   // TODO - support non-uniform vector shift amounts.
8189   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
8190       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
8191     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
8192       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
8193         uint64_t c1 = N0C1->getZExtValue();
8194         uint64_t c2 = N1C->getZExtValue();
8195         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
8196         SDValue Shift;
8197         if (c2 > c1) {
8198           Mask <<= c2 - c1;
8199           SDLoc DL(N);
8200           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
8201                               DAG.getConstant(c2 - c1, DL, ShiftVT));
8202         } else {
8203           Mask.lshrInPlace(c1 - c2);
8204           SDLoc DL(N);
8205           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
8206                               DAG.getConstant(c1 - c2, DL, ShiftVT));
8207         }
8208         SDLoc DL(N0);
8209         return DAG.getNode(ISD::AND, DL, VT, Shift,
8210                            DAG.getConstant(Mask, DL, VT));
8211       }
8212     }
8213   }
8214 
8215   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
8216   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
8217       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
8218     SDLoc DL(N);
8219     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
8220     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
8221     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
8222   }
8223 
8224   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
8225   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
8226   // Variant of version done on multiply, except mul by a power of 2 is turned
8227   // into a shift.
8228   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
8229       N0.getNode()->hasOneUse() &&
8230       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8231       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
8232       TLI.isDesirableToCommuteWithShift(N, Level)) {
8233     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
8234     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8235     AddToWorklist(Shl0.getNode());
8236     AddToWorklist(Shl1.getNode());
8237     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
8238   }
8239 
8240   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
8241   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
8242       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
8243       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
8244     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
8245     if (isConstantOrConstantVector(Shl))
8246       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
8247   }
8248 
8249   if (N1C && !N1C->isOpaque())
8250     if (SDValue NewSHL = visitShiftByConstant(N))
8251       return NewSHL;
8252 
8253   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
8254   if (N0.getOpcode() == ISD::VSCALE)
8255     if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) {
8256       const APInt &C0 = N0.getConstantOperandAPInt(0);
8257       const APInt &C1 = NC1->getAPIntValue();
8258       return DAG.getVScale(SDLoc(N), VT, C0 << C1);
8259     }
8260 
8261   return SDValue();
8262 }
8263 
8264 // Transform a right shift of a multiply into a multiply-high.
8265 // Examples:
8266 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
8267 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
8268 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
8269                                   const TargetLowering &TLI) {
8270   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
8271          "SRL or SRA node is required here!");
8272 
8273   // Check the shift amount. Proceed with the transformation if the shift
8274   // amount is constant.
8275   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
8276   if (!ShiftAmtSrc)
8277     return SDValue();
8278 
8279   SDLoc DL(N);
8280 
8281   // The operation feeding into the shift must be a multiply.
8282   SDValue ShiftOperand = N->getOperand(0);
8283   if (ShiftOperand.getOpcode() != ISD::MUL)
8284     return SDValue();
8285 
8286   // Both operands must be equivalent extend nodes.
8287   SDValue LeftOp = ShiftOperand.getOperand(0);
8288   SDValue RightOp = ShiftOperand.getOperand(1);
8289   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
8290   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
8291 
8292   if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
8293     return SDValue();
8294 
8295   EVT WideVT1 = LeftOp.getValueType();
8296   EVT WideVT2 = RightOp.getValueType();
8297   (void)WideVT2;
8298   // Proceed with the transformation if the wide types match.
8299   assert((WideVT1 == WideVT2) &&
8300          "Cannot have a multiply node with two different operand types.");
8301 
8302   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
8303   // Check that the two extend nodes are the same type.
8304   if (NarrowVT !=  RightOp.getOperand(0).getValueType())
8305     return SDValue();
8306 
8307   // Proceed with the transformation if the wide type is twice as large
8308   // as the narrow type.
8309   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
8310   if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
8311     return SDValue();
8312 
8313   // Check the shift amount with the narrow type size.
8314   // Proceed with the transformation if the shift amount is the width
8315   // of the narrow type.
8316   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
8317   if (ShiftAmt != NarrowVTSize)
8318     return SDValue();
8319 
8320   // If the operation feeding into the MUL is a sign extend (sext),
8321   // we use mulhs. Othewise, zero extends (zext) use mulhu.
8322   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
8323 
8324   // Combine to mulh if mulh is legal/custom for the narrow type on the target.
8325   if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
8326     return SDValue();
8327 
8328   SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
8329                                RightOp.getOperand(0));
8330   return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
8331                                      : DAG.getZExtOrTrunc(Result, DL, WideVT1));
8332 }
8333 
8334 SDValue DAGCombiner::visitSRA(SDNode *N) {
8335   SDValue N0 = N->getOperand(0);
8336   SDValue N1 = N->getOperand(1);
8337   if (SDValue V = DAG.simplifyShift(N0, N1))
8338     return V;
8339 
8340   EVT VT = N0.getValueType();
8341   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8342 
8343   // Arithmetic shifting an all-sign-bit value is a no-op.
8344   // fold (sra 0, x) -> 0
8345   // fold (sra -1, x) -> -1
8346   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
8347     return N0;
8348 
8349   // fold vector ops
8350   if (VT.isVector())
8351     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8352       return FoldedVOp;
8353 
8354   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8355 
8356   // fold (sra c1, c2) -> (sra c1, c2)
8357   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
8358     return C;
8359 
8360   if (SDValue NewSel = foldBinOpIntoSelect(N))
8361     return NewSel;
8362 
8363   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
8364   // sext_inreg.
8365   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
8366     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
8367     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
8368     if (VT.isVector())
8369       ExtVT = EVT::getVectorVT(*DAG.getContext(),
8370                                ExtVT, VT.getVectorNumElements());
8371     if (!LegalOperations ||
8372         TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) ==
8373         TargetLowering::Legal)
8374       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
8375                          N0.getOperand(0), DAG.getValueType(ExtVT));
8376   }
8377 
8378   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
8379   // clamp (add c1, c2) to max shift.
8380   if (N0.getOpcode() == ISD::SRA) {
8381     SDLoc DL(N);
8382     EVT ShiftVT = N1.getValueType();
8383     EVT ShiftSVT = ShiftVT.getScalarType();
8384     SmallVector<SDValue, 16> ShiftValues;
8385 
8386     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
8387       APInt c1 = LHS->getAPIntValue();
8388       APInt c2 = RHS->getAPIntValue();
8389       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8390       APInt Sum = c1 + c2;
8391       unsigned ShiftSum =
8392           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
8393       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
8394       return true;
8395     };
8396     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
8397       SDValue ShiftValue;
8398       if (VT.isVector())
8399         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
8400       else
8401         ShiftValue = ShiftValues[0];
8402       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
8403     }
8404   }
8405 
8406   // fold (sra (shl X, m), (sub result_size, n))
8407   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
8408   // result_size - n != m.
8409   // If truncate is free for the target sext(shl) is likely to result in better
8410   // code.
8411   if (N0.getOpcode() == ISD::SHL && N1C) {
8412     // Get the two constanst of the shifts, CN0 = m, CN = n.
8413     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
8414     if (N01C) {
8415       LLVMContext &Ctx = *DAG.getContext();
8416       // Determine what the truncate's result bitsize and type would be.
8417       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
8418 
8419       if (VT.isVector())
8420         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8421 
8422       // Determine the residual right-shift amount.
8423       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
8424 
8425       // If the shift is not a no-op (in which case this should be just a sign
8426       // extend already), the truncated to type is legal, sign_extend is legal
8427       // on that type, and the truncate to that type is both legal and free,
8428       // perform the transform.
8429       if ((ShiftAmt > 0) &&
8430           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
8431           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
8432           TLI.isTruncateFree(VT, TruncVT)) {
8433         SDLoc DL(N);
8434         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
8435             getShiftAmountTy(N0.getOperand(0).getValueType()));
8436         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
8437                                     N0.getOperand(0), Amt);
8438         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
8439                                     Shift);
8440         return DAG.getNode(ISD::SIGN_EXTEND, DL,
8441                            N->getValueType(0), Trunc);
8442       }
8443     }
8444   }
8445 
8446   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
8447   //   sra (add (shl X, N1C), AddC), N1C -->
8448   //   sext (add (trunc X to (width - N1C)), AddC')
8449   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
8450       N0.getOperand(0).getOpcode() == ISD::SHL &&
8451       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
8452     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
8453       SDValue Shl = N0.getOperand(0);
8454       // Determine what the truncate's type would be and ask the target if that
8455       // is a free operation.
8456       LLVMContext &Ctx = *DAG.getContext();
8457       unsigned ShiftAmt = N1C->getZExtValue();
8458       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
8459       if (VT.isVector())
8460         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
8461 
8462       // TODO: The simple type check probably belongs in the default hook
8463       //       implementation and/or target-specific overrides (because
8464       //       non-simple types likely require masking when legalized), but that
8465       //       restriction may conflict with other transforms.
8466       if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
8467           TLI.isTruncateFree(VT, TruncVT)) {
8468         SDLoc DL(N);
8469         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
8470         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
8471                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
8472         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
8473         return DAG.getSExtOrTrunc(Add, DL, VT);
8474       }
8475     }
8476   }
8477 
8478   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
8479   if (N1.getOpcode() == ISD::TRUNCATE &&
8480       N1.getOperand(0).getOpcode() == ISD::AND) {
8481     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8482       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
8483   }
8484 
8485   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
8486   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
8487   //      if c1 is equal to the number of bits the trunc removes
8488   // TODO - support non-uniform vector shift amounts.
8489   if (N0.getOpcode() == ISD::TRUNCATE &&
8490       (N0.getOperand(0).getOpcode() == ISD::SRL ||
8491        N0.getOperand(0).getOpcode() == ISD::SRA) &&
8492       N0.getOperand(0).hasOneUse() &&
8493       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
8494     SDValue N0Op0 = N0.getOperand(0);
8495     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
8496       EVT LargeVT = N0Op0.getValueType();
8497       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
8498       if (LargeShift->getAPIntValue() == TruncBits) {
8499         SDLoc DL(N);
8500         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
8501                                       getShiftAmountTy(LargeVT));
8502         SDValue SRA =
8503             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
8504         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
8505       }
8506     }
8507   }
8508 
8509   // Simplify, based on bits shifted out of the LHS.
8510   if (SimplifyDemandedBits(SDValue(N, 0)))
8511     return SDValue(N, 0);
8512 
8513   // If the sign bit is known to be zero, switch this to a SRL.
8514   if (DAG.SignBitIsZero(N0))
8515     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
8516 
8517   if (N1C && !N1C->isOpaque())
8518     if (SDValue NewSRA = visitShiftByConstant(N))
8519       return NewSRA;
8520 
8521   // Try to transform this shift into a multiply-high if
8522   // it matches the appropriate pattern detected in combineShiftToMULH.
8523   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8524     return MULH;
8525 
8526   return SDValue();
8527 }
8528 
8529 SDValue DAGCombiner::visitSRL(SDNode *N) {
8530   SDValue N0 = N->getOperand(0);
8531   SDValue N1 = N->getOperand(1);
8532   if (SDValue V = DAG.simplifyShift(N0, N1))
8533     return V;
8534 
8535   EVT VT = N0.getValueType();
8536   unsigned OpSizeInBits = VT.getScalarSizeInBits();
8537 
8538   // fold vector ops
8539   if (VT.isVector())
8540     if (SDValue FoldedVOp = SimplifyVBinOp(N))
8541       return FoldedVOp;
8542 
8543   ConstantSDNode *N1C = isConstOrConstSplat(N1);
8544 
8545   // fold (srl c1, c2) -> c1 >>u c2
8546   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
8547     return C;
8548 
8549   if (SDValue NewSel = foldBinOpIntoSelect(N))
8550     return NewSel;
8551 
8552   // if (srl x, c) is known to be zero, return 0
8553   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
8554                                    APInt::getAllOnesValue(OpSizeInBits)))
8555     return DAG.getConstant(0, SDLoc(N), VT);
8556 
8557   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
8558   if (N0.getOpcode() == ISD::SRL) {
8559     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
8560                                           ConstantSDNode *RHS) {
8561       APInt c1 = LHS->getAPIntValue();
8562       APInt c2 = RHS->getAPIntValue();
8563       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8564       return (c1 + c2).uge(OpSizeInBits);
8565     };
8566     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
8567       return DAG.getConstant(0, SDLoc(N), VT);
8568 
8569     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
8570                                        ConstantSDNode *RHS) {
8571       APInt c1 = LHS->getAPIntValue();
8572       APInt c2 = RHS->getAPIntValue();
8573       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
8574       return (c1 + c2).ult(OpSizeInBits);
8575     };
8576     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
8577       SDLoc DL(N);
8578       EVT ShiftVT = N1.getValueType();
8579       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
8580       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
8581     }
8582   }
8583 
8584   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
8585       N0.getOperand(0).getOpcode() == ISD::SRL) {
8586     SDValue InnerShift = N0.getOperand(0);
8587     // TODO - support non-uniform vector shift amounts.
8588     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
8589       uint64_t c1 = N001C->getZExtValue();
8590       uint64_t c2 = N1C->getZExtValue();
8591       EVT InnerShiftVT = InnerShift.getValueType();
8592       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
8593       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
8594       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
8595       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
8596       if (c1 + OpSizeInBits == InnerShiftSize) {
8597         SDLoc DL(N);
8598         if (c1 + c2 >= InnerShiftSize)
8599           return DAG.getConstant(0, DL, VT);
8600         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8601         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8602                                        InnerShift.getOperand(0), NewShiftAmt);
8603         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
8604       }
8605       // In the more general case, we can clear the high bits after the shift:
8606       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
8607       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
8608           c1 + c2 < InnerShiftSize) {
8609         SDLoc DL(N);
8610         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
8611         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8612                                        InnerShift.getOperand(0), NewShiftAmt);
8613         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
8614                                                             OpSizeInBits - c2),
8615                                        DL, InnerShiftVT);
8616         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
8617         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
8618       }
8619     }
8620   }
8621 
8622   // fold (srl (shl x, c), c) -> (and x, cst2)
8623   // TODO - (srl (shl x, c1), c2).
8624   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8625       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8626     SDLoc DL(N);
8627     SDValue Mask =
8628         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8629     AddToWorklist(Mask.getNode());
8630     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8631   }
8632 
8633   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8634   // TODO - support non-uniform vector shift amounts.
8635   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8636     // Shifting in all undef bits?
8637     EVT SmallVT = N0.getOperand(0).getValueType();
8638     unsigned BitSize = SmallVT.getScalarSizeInBits();
8639     if (N1C->getAPIntValue().uge(BitSize))
8640       return DAG.getUNDEF(VT);
8641 
8642     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8643       uint64_t ShiftAmt = N1C->getZExtValue();
8644       SDLoc DL0(N0);
8645       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8646                                        N0.getOperand(0),
8647                           DAG.getConstant(ShiftAmt, DL0,
8648                                           getShiftAmountTy(SmallVT)));
8649       AddToWorklist(SmallShift.getNode());
8650       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8651       SDLoc DL(N);
8652       return DAG.getNode(ISD::AND, DL, VT,
8653                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8654                          DAG.getConstant(Mask, DL, VT));
8655     }
8656   }
8657 
8658   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8659   // bit, which is unmodified by sra.
8660   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8661     if (N0.getOpcode() == ISD::SRA)
8662       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8663   }
8664 
8665   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8666   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8667       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8668     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8669 
8670     // If any of the input bits are KnownOne, then the input couldn't be all
8671     // zeros, thus the result of the srl will always be zero.
8672     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8673 
8674     // If all of the bits input the to ctlz node are known to be zero, then
8675     // the result of the ctlz is "32" and the result of the shift is one.
8676     APInt UnknownBits = ~Known.Zero;
8677     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8678 
8679     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8680     if (UnknownBits.isPowerOf2()) {
8681       // Okay, we know that only that the single bit specified by UnknownBits
8682       // could be set on input to the CTLZ node. If this bit is set, the SRL
8683       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8684       // to an SRL/XOR pair, which is likely to simplify more.
8685       unsigned ShAmt = UnknownBits.countTrailingZeros();
8686       SDValue Op = N0.getOperand(0);
8687 
8688       if (ShAmt) {
8689         SDLoc DL(N0);
8690         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8691                   DAG.getConstant(ShAmt, DL,
8692                                   getShiftAmountTy(Op.getValueType())));
8693         AddToWorklist(Op.getNode());
8694       }
8695 
8696       SDLoc DL(N);
8697       return DAG.getNode(ISD::XOR, DL, VT,
8698                          Op, DAG.getConstant(1, DL, VT));
8699     }
8700   }
8701 
8702   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8703   if (N1.getOpcode() == ISD::TRUNCATE &&
8704       N1.getOperand(0).getOpcode() == ISD::AND) {
8705     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8706       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8707   }
8708 
8709   // fold operands of srl based on knowledge that the low bits are not
8710   // demanded.
8711   if (SimplifyDemandedBits(SDValue(N, 0)))
8712     return SDValue(N, 0);
8713 
8714   if (N1C && !N1C->isOpaque())
8715     if (SDValue NewSRL = visitShiftByConstant(N))
8716       return NewSRL;
8717 
8718   // Attempt to convert a srl of a load into a narrower zero-extending load.
8719   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8720     return NarrowLoad;
8721 
8722   // Here is a common situation. We want to optimize:
8723   //
8724   //   %a = ...
8725   //   %b = and i32 %a, 2
8726   //   %c = srl i32 %b, 1
8727   //   brcond i32 %c ...
8728   //
8729   // into
8730   //
8731   //   %a = ...
8732   //   %b = and %a, 2
8733   //   %c = setcc eq %b, 0
8734   //   brcond %c ...
8735   //
8736   // However when after the source operand of SRL is optimized into AND, the SRL
8737   // itself may not be optimized further. Look for it and add the BRCOND into
8738   // the worklist.
8739   if (N->hasOneUse()) {
8740     SDNode *Use = *N->use_begin();
8741     if (Use->getOpcode() == ISD::BRCOND)
8742       AddToWorklist(Use);
8743     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8744       // Also look pass the truncate.
8745       Use = *Use->use_begin();
8746       if (Use->getOpcode() == ISD::BRCOND)
8747         AddToWorklist(Use);
8748     }
8749   }
8750 
8751   // Try to transform this shift into a multiply-high if
8752   // it matches the appropriate pattern detected in combineShiftToMULH.
8753   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
8754     return MULH;
8755 
8756   return SDValue();
8757 }
8758 
8759 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8760   EVT VT = N->getValueType(0);
8761   SDValue N0 = N->getOperand(0);
8762   SDValue N1 = N->getOperand(1);
8763   SDValue N2 = N->getOperand(2);
8764   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8765   unsigned BitWidth = VT.getScalarSizeInBits();
8766 
8767   // fold (fshl N0, N1, 0) -> N0
8768   // fold (fshr N0, N1, 0) -> N1
8769   if (isPowerOf2_32(BitWidth))
8770     if (DAG.MaskedValueIsZero(
8771             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8772       return IsFSHL ? N0 : N1;
8773 
8774   auto IsUndefOrZero = [](SDValue V) {
8775     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8776   };
8777 
8778   // TODO - support non-uniform vector shift amounts.
8779   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8780     EVT ShAmtTy = N2.getValueType();
8781 
8782     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8783     if (Cst->getAPIntValue().uge(BitWidth)) {
8784       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8785       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8786                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8787     }
8788 
8789     unsigned ShAmt = Cst->getZExtValue();
8790     if (ShAmt == 0)
8791       return IsFSHL ? N0 : N1;
8792 
8793     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8794     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8795     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8796     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8797     if (IsUndefOrZero(N0))
8798       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8799                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8800                                          SDLoc(N), ShAmtTy));
8801     if (IsUndefOrZero(N1))
8802       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8803                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8804                                          SDLoc(N), ShAmtTy));
8805 
8806     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8807     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
8808     // TODO - bigendian support once we have test coverage.
8809     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
8810     // TODO - permit LHS EXTLOAD if extensions are shifted out.
8811     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
8812         !DAG.getDataLayout().isBigEndian()) {
8813       auto *LHS = dyn_cast<LoadSDNode>(N0);
8814       auto *RHS = dyn_cast<LoadSDNode>(N1);
8815       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
8816           LHS->getAddressSpace() == RHS->getAddressSpace() &&
8817           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
8818           ISD::isNON_EXTLoad(LHS)) {
8819         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
8820           SDLoc DL(RHS);
8821           uint64_t PtrOff =
8822               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
8823           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
8824           bool Fast = false;
8825           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
8826                                      RHS->getAddressSpace(), NewAlign,
8827                                      RHS->getMemOperand()->getFlags(), &Fast) &&
8828               Fast) {
8829             SDValue NewPtr = DAG.getMemBasePlusOffset(
8830                 RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL);
8831             AddToWorklist(NewPtr.getNode());
8832             SDValue Load = DAG.getLoad(
8833                 VT, DL, RHS->getChain(), NewPtr,
8834                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
8835                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
8836             // Replace the old load's chain with the new load's chain.
8837             WorklistRemover DeadNodes(*this);
8838             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
8839             return Load;
8840           }
8841         }
8842       }
8843     }
8844   }
8845 
8846   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8847   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8848   // iff We know the shift amount is in range.
8849   // TODO: when is it worth doing SUB(BW, N2) as well?
8850   if (isPowerOf2_32(BitWidth)) {
8851     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8852     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8853       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8854     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8855       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8856   }
8857 
8858   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8859   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8860   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8861   // is legal as well we might be better off avoiding non-constant (BW - N2).
8862   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8863   if (N0 == N1 && hasOperation(RotOpc, VT))
8864     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8865 
8866   // Simplify, based on bits shifted out of N0/N1.
8867   if (SimplifyDemandedBits(SDValue(N, 0)))
8868     return SDValue(N, 0);
8869 
8870   return SDValue();
8871 }
8872 
8873 SDValue DAGCombiner::visitABS(SDNode *N) {
8874   SDValue N0 = N->getOperand(0);
8875   EVT VT = N->getValueType(0);
8876 
8877   // fold (abs c1) -> c2
8878   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8879     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8880   // fold (abs (abs x)) -> (abs x)
8881   if (N0.getOpcode() == ISD::ABS)
8882     return N0;
8883   // fold (abs x) -> x iff not-negative
8884   if (DAG.SignBitIsZero(N0))
8885     return N0;
8886   return SDValue();
8887 }
8888 
8889 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8890   SDValue N0 = N->getOperand(0);
8891   EVT VT = N->getValueType(0);
8892 
8893   // fold (bswap c1) -> c2
8894   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8895     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8896   // fold (bswap (bswap x)) -> x
8897   if (N0.getOpcode() == ISD::BSWAP)
8898     return N0->getOperand(0);
8899   return SDValue();
8900 }
8901 
8902 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8903   SDValue N0 = N->getOperand(0);
8904   EVT VT = N->getValueType(0);
8905 
8906   // fold (bitreverse c1) -> c2
8907   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8908     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8909   // fold (bitreverse (bitreverse x)) -> x
8910   if (N0.getOpcode() == ISD::BITREVERSE)
8911     return N0.getOperand(0);
8912   return SDValue();
8913 }
8914 
8915 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8916   SDValue N0 = N->getOperand(0);
8917   EVT VT = N->getValueType(0);
8918 
8919   // fold (ctlz c1) -> c2
8920   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8921     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8922 
8923   // If the value is known never to be zero, switch to the undef version.
8924   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8925     if (DAG.isKnownNeverZero(N0))
8926       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8927   }
8928 
8929   return SDValue();
8930 }
8931 
8932 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8933   SDValue N0 = N->getOperand(0);
8934   EVT VT = N->getValueType(0);
8935 
8936   // fold (ctlz_zero_undef c1) -> c2
8937   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8938     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8939   return SDValue();
8940 }
8941 
8942 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8943   SDValue N0 = N->getOperand(0);
8944   EVT VT = N->getValueType(0);
8945 
8946   // fold (cttz c1) -> c2
8947   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8948     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8949 
8950   // If the value is known never to be zero, switch to the undef version.
8951   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8952     if (DAG.isKnownNeverZero(N0))
8953       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8954   }
8955 
8956   return SDValue();
8957 }
8958 
8959 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8960   SDValue N0 = N->getOperand(0);
8961   EVT VT = N->getValueType(0);
8962 
8963   // fold (cttz_zero_undef c1) -> c2
8964   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8965     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8966   return SDValue();
8967 }
8968 
8969 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8970   SDValue N0 = N->getOperand(0);
8971   EVT VT = N->getValueType(0);
8972 
8973   // fold (ctpop c1) -> c2
8974   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8975     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8976   return SDValue();
8977 }
8978 
8979 // FIXME: This should be checking for no signed zeros on individual operands, as
8980 // well as no nans.
8981 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8982                                          SDValue RHS,
8983                                          const TargetLowering &TLI) {
8984   const TargetOptions &Options = DAG.getTarget().Options;
8985   EVT VT = LHS.getValueType();
8986 
8987   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8988          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8989          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8990 }
8991 
8992 /// Generate Min/Max node
8993 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8994                                    SDValue RHS, SDValue True, SDValue False,
8995                                    ISD::CondCode CC, const TargetLowering &TLI,
8996                                    SelectionDAG &DAG) {
8997   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8998     return SDValue();
8999 
9000   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
9001   switch (CC) {
9002   case ISD::SETOLT:
9003   case ISD::SETOLE:
9004   case ISD::SETLT:
9005   case ISD::SETLE:
9006   case ISD::SETULT:
9007   case ISD::SETULE: {
9008     // Since it's known never nan to get here already, either fminnum or
9009     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
9010     // expanded in terms of it.
9011     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
9012     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9013       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9014 
9015     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
9016     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9017       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9018     return SDValue();
9019   }
9020   case ISD::SETOGT:
9021   case ISD::SETOGE:
9022   case ISD::SETGT:
9023   case ISD::SETGE:
9024   case ISD::SETUGT:
9025   case ISD::SETUGE: {
9026     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
9027     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
9028       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
9029 
9030     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
9031     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
9032       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
9033     return SDValue();
9034   }
9035   default:
9036     return SDValue();
9037   }
9038 }
9039 
9040 /// If a (v)select has a condition value that is a sign-bit test, try to smear
9041 /// the condition operand sign-bit across the value width and use it as a mask.
9042 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
9043   SDValue Cond = N->getOperand(0);
9044   SDValue C1 = N->getOperand(1);
9045   SDValue C2 = N->getOperand(2);
9046   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
9047          "Expected select-of-constants");
9048 
9049   EVT VT = N->getValueType(0);
9050   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
9051       VT != Cond.getOperand(0).getValueType())
9052     return SDValue();
9053 
9054   // The inverted-condition + commuted-select variants of these patterns are
9055   // canonicalized to these forms in IR.
9056   SDValue X = Cond.getOperand(0);
9057   SDValue CondC = Cond.getOperand(1);
9058   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
9059   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
9060       isAllOnesOrAllOnesSplat(C2)) {
9061     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
9062     SDLoc DL(N);
9063     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9064     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9065     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
9066   }
9067   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
9068     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
9069     SDLoc DL(N);
9070     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
9071     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
9072     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
9073   }
9074   return SDValue();
9075 }
9076 
9077 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
9078   SDValue Cond = N->getOperand(0);
9079   SDValue N1 = N->getOperand(1);
9080   SDValue N2 = N->getOperand(2);
9081   EVT VT = N->getValueType(0);
9082   EVT CondVT = Cond.getValueType();
9083   SDLoc DL(N);
9084 
9085   if (!VT.isInteger())
9086     return SDValue();
9087 
9088   auto *C1 = dyn_cast<ConstantSDNode>(N1);
9089   auto *C2 = dyn_cast<ConstantSDNode>(N2);
9090   if (!C1 || !C2)
9091     return SDValue();
9092 
9093   // Only do this before legalization to avoid conflicting with target-specific
9094   // transforms in the other direction (create a select from a zext/sext). There
9095   // is also a target-independent combine here in DAGCombiner in the other
9096   // direction for (select Cond, -1, 0) when the condition is not i1.
9097   if (CondVT == MVT::i1 && !LegalOperations) {
9098     if (C1->isNullValue() && C2->isOne()) {
9099       // select Cond, 0, 1 --> zext (!Cond)
9100       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9101       if (VT != MVT::i1)
9102         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
9103       return NotCond;
9104     }
9105     if (C1->isNullValue() && C2->isAllOnesValue()) {
9106       // select Cond, 0, -1 --> sext (!Cond)
9107       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
9108       if (VT != MVT::i1)
9109         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
9110       return NotCond;
9111     }
9112     if (C1->isOne() && C2->isNullValue()) {
9113       // select Cond, 1, 0 --> zext (Cond)
9114       if (VT != MVT::i1)
9115         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9116       return Cond;
9117     }
9118     if (C1->isAllOnesValue() && C2->isNullValue()) {
9119       // select Cond, -1, 0 --> sext (Cond)
9120       if (VT != MVT::i1)
9121         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9122       return Cond;
9123     }
9124 
9125     // Use a target hook because some targets may prefer to transform in the
9126     // other direction.
9127     if (TLI.convertSelectOfConstantsToMath(VT)) {
9128       // For any constants that differ by 1, we can transform the select into an
9129       // extend and add.
9130       const APInt &C1Val = C1->getAPIntValue();
9131       const APInt &C2Val = C2->getAPIntValue();
9132       if (C1Val - 1 == C2Val) {
9133         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
9134         if (VT != MVT::i1)
9135           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9136         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9137       }
9138       if (C1Val + 1 == C2Val) {
9139         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
9140         if (VT != MVT::i1)
9141           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
9142         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
9143       }
9144 
9145       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
9146       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
9147         if (VT != MVT::i1)
9148           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
9149         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
9150         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
9151       }
9152 
9153       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9154         return V;
9155     }
9156 
9157     return SDValue();
9158   }
9159 
9160   // fold (select Cond, 0, 1) -> (xor Cond, 1)
9161   // We can't do this reliably if integer based booleans have different contents
9162   // to floating point based booleans. This is because we can't tell whether we
9163   // have an integer-based boolean or a floating-point-based boolean unless we
9164   // can find the SETCC that produced it and inspect its operands. This is
9165   // fairly easy if C is the SETCC node, but it can potentially be
9166   // undiscoverable (or not reasonably discoverable). For example, it could be
9167   // in another basic block or it could require searching a complicated
9168   // expression.
9169   if (CondVT.isInteger() &&
9170       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
9171           TargetLowering::ZeroOrOneBooleanContent &&
9172       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
9173           TargetLowering::ZeroOrOneBooleanContent &&
9174       C1->isNullValue() && C2->isOne()) {
9175     SDValue NotCond =
9176         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
9177     if (VT.bitsEq(CondVT))
9178       return NotCond;
9179     return DAG.getZExtOrTrunc(NotCond, DL, VT);
9180   }
9181 
9182   return SDValue();
9183 }
9184 
9185 SDValue DAGCombiner::visitSELECT(SDNode *N) {
9186   SDValue N0 = N->getOperand(0);
9187   SDValue N1 = N->getOperand(1);
9188   SDValue N2 = N->getOperand(2);
9189   EVT VT = N->getValueType(0);
9190   EVT VT0 = N0.getValueType();
9191   SDLoc DL(N);
9192   SDNodeFlags Flags = N->getFlags();
9193 
9194   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9195     return V;
9196 
9197   // fold (select X, X, Y) -> (or X, Y)
9198   // fold (select X, 1, Y) -> (or C, Y)
9199   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
9200     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
9201 
9202   if (SDValue V = foldSelectOfConstants(N))
9203     return V;
9204 
9205   // fold (select C, 0, X) -> (and (not C), X)
9206   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
9207     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9208     AddToWorklist(NOTNode.getNode());
9209     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
9210   }
9211   // fold (select C, X, 1) -> (or (not C), X)
9212   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
9213     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
9214     AddToWorklist(NOTNode.getNode());
9215     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
9216   }
9217   // fold (select X, Y, X) -> (and X, Y)
9218   // fold (select X, Y, 0) -> (and X, Y)
9219   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
9220     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
9221 
9222   // If we can fold this based on the true/false value, do so.
9223   if (SimplifySelectOps(N, N1, N2))
9224     return SDValue(N, 0); // Don't revisit N.
9225 
9226   if (VT0 == MVT::i1) {
9227     // The code in this block deals with the following 2 equivalences:
9228     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
9229     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
9230     // The target can specify its preferred form with the
9231     // shouldNormalizeToSelectSequence() callback. However we always transform
9232     // to the right anyway if we find the inner select exists in the DAG anyway
9233     // and we always transform to the left side if we know that we can further
9234     // optimize the combination of the conditions.
9235     bool normalizeToSequence =
9236         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
9237     // select (and Cond0, Cond1), X, Y
9238     //   -> select Cond0, (select Cond1, X, Y), Y
9239     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
9240       SDValue Cond0 = N0->getOperand(0);
9241       SDValue Cond1 = N0->getOperand(1);
9242       SDValue InnerSelect =
9243           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
9244       if (normalizeToSequence || !InnerSelect.use_empty())
9245         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
9246                            InnerSelect, N2, Flags);
9247       // Cleanup on failure.
9248       if (InnerSelect.use_empty())
9249         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9250     }
9251     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
9252     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
9253       SDValue Cond0 = N0->getOperand(0);
9254       SDValue Cond1 = N0->getOperand(1);
9255       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
9256                                         Cond1, N1, N2, Flags);
9257       if (normalizeToSequence || !InnerSelect.use_empty())
9258         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
9259                            InnerSelect, Flags);
9260       // Cleanup on failure.
9261       if (InnerSelect.use_empty())
9262         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
9263     }
9264 
9265     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
9266     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
9267       SDValue N1_0 = N1->getOperand(0);
9268       SDValue N1_1 = N1->getOperand(1);
9269       SDValue N1_2 = N1->getOperand(2);
9270       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
9271         // Create the actual and node if we can generate good code for it.
9272         if (!normalizeToSequence) {
9273           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
9274           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
9275                              N2, Flags);
9276         }
9277         // Otherwise see if we can optimize the "and" to a better pattern.
9278         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
9279           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
9280                              N2, Flags);
9281         }
9282       }
9283     }
9284     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
9285     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
9286       SDValue N2_0 = N2->getOperand(0);
9287       SDValue N2_1 = N2->getOperand(1);
9288       SDValue N2_2 = N2->getOperand(2);
9289       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
9290         // Create the actual or node if we can generate good code for it.
9291         if (!normalizeToSequence) {
9292           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
9293           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
9294                              N2_2, Flags);
9295         }
9296         // Otherwise see if we can optimize to a better pattern.
9297         if (SDValue Combined = visitORLike(N0, N2_0, N))
9298           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
9299                              N2_2, Flags);
9300       }
9301     }
9302   }
9303 
9304   // select (not Cond), N1, N2 -> select Cond, N2, N1
9305   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
9306     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
9307     SelectOp->setFlags(Flags);
9308     return SelectOp;
9309   }
9310 
9311   // Fold selects based on a setcc into other things, such as min/max/abs.
9312   if (N0.getOpcode() == ISD::SETCC) {
9313     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
9314     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9315 
9316     // select (fcmp lt x, y), x, y -> fminnum x, y
9317     // select (fcmp gt x, y), x, y -> fmaxnum x, y
9318     //
9319     // This is OK if we don't care what happens if either operand is a NaN.
9320     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
9321       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
9322                                                 CC, TLI, DAG))
9323         return FMinMax;
9324 
9325     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
9326     // This is conservatively limited to pre-legal-operations to give targets
9327     // a chance to reverse the transform if they want to do that. Also, it is
9328     // unlikely that the pattern would be formed late, so it's probably not
9329     // worth going through the other checks.
9330     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
9331         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
9332         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
9333       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
9334       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
9335       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
9336         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
9337         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
9338         //
9339         // The IR equivalent of this transform would have this form:
9340         //   %a = add %x, C
9341         //   %c = icmp ugt %x, ~C
9342         //   %r = select %c, -1, %a
9343         //   =>
9344         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
9345         //   %u0 = extractvalue %u, 0
9346         //   %u1 = extractvalue %u, 1
9347         //   %r = select %u1, -1, %u0
9348         SDVTList VTs = DAG.getVTList(VT, VT0);
9349         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
9350         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
9351       }
9352     }
9353 
9354     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
9355         (!LegalOperations &&
9356          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
9357       // Any flags available in a select/setcc fold will be on the setcc as they
9358       // migrated from fcmp
9359       Flags = N0.getNode()->getFlags();
9360       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
9361                                        N2, N0.getOperand(2));
9362       SelectNode->setFlags(Flags);
9363       return SelectNode;
9364     }
9365 
9366     return SimplifySelect(DL, N0, N1, N2);
9367   }
9368 
9369   return SDValue();
9370 }
9371 
9372 // This function assumes all the vselect's arguments are CONCAT_VECTOR
9373 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
9374 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
9375   SDLoc DL(N);
9376   SDValue Cond = N->getOperand(0);
9377   SDValue LHS = N->getOperand(1);
9378   SDValue RHS = N->getOperand(2);
9379   EVT VT = N->getValueType(0);
9380   int NumElems = VT.getVectorNumElements();
9381   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
9382          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
9383          Cond.getOpcode() == ISD::BUILD_VECTOR);
9384 
9385   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
9386   // binary ones here.
9387   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
9388     return SDValue();
9389 
9390   // We're sure we have an even number of elements due to the
9391   // concat_vectors we have as arguments to vselect.
9392   // Skip BV elements until we find one that's not an UNDEF
9393   // After we find an UNDEF element, keep looping until we get to half the
9394   // length of the BV and see if all the non-undef nodes are the same.
9395   ConstantSDNode *BottomHalf = nullptr;
9396   for (int i = 0; i < NumElems / 2; ++i) {
9397     if (Cond->getOperand(i)->isUndef())
9398       continue;
9399 
9400     if (BottomHalf == nullptr)
9401       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9402     else if (Cond->getOperand(i).getNode() != BottomHalf)
9403       return SDValue();
9404   }
9405 
9406   // Do the same for the second half of the BuildVector
9407   ConstantSDNode *TopHalf = nullptr;
9408   for (int i = NumElems / 2; i < NumElems; ++i) {
9409     if (Cond->getOperand(i)->isUndef())
9410       continue;
9411 
9412     if (TopHalf == nullptr)
9413       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
9414     else if (Cond->getOperand(i).getNode() != TopHalf)
9415       return SDValue();
9416   }
9417 
9418   assert(TopHalf && BottomHalf &&
9419          "One half of the selector was all UNDEFs and the other was all the "
9420          "same value. This should have been addressed before this function.");
9421   return DAG.getNode(
9422       ISD::CONCAT_VECTORS, DL, VT,
9423       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
9424       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
9425 }
9426 
9427 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
9428   if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD)
9429     return false;
9430 
9431   // For now we check only the LHS of the add.
9432   SDValue LHS = Index.getOperand(0);
9433   SDValue SplatVal = DAG.getSplatValue(LHS);
9434   if (!SplatVal)
9435     return false;
9436 
9437   BasePtr = SplatVal;
9438   Index = Index.getOperand(1);
9439   return true;
9440 }
9441 
9442 // Fold sext/zext of index into index type.
9443 bool refineIndexType(MaskedGatherScatterSDNode *MGS, SDValue &Index,
9444                      bool Scaled, SelectionDAG &DAG) {
9445   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9446 
9447   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
9448     SDValue Op = Index.getOperand(0);
9449     MGS->setIndexType(Scaled ? ISD::UNSIGNED_SCALED : ISD::UNSIGNED_UNSCALED);
9450     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9451       Index = Op;
9452       return true;
9453     }
9454   }
9455 
9456   if (Index.getOpcode() == ISD::SIGN_EXTEND) {
9457     SDValue Op = Index.getOperand(0);
9458     MGS->setIndexType(Scaled ? ISD::SIGNED_SCALED : ISD::SIGNED_UNSCALED);
9459     if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType())) {
9460       Index = Op;
9461       return true;
9462     }
9463   }
9464 
9465   return false;
9466 }
9467 
9468 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
9469   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
9470   SDValue Mask = MSC->getMask();
9471   SDValue Chain = MSC->getChain();
9472   SDValue Index = MSC->getIndex();
9473   SDValue Scale = MSC->getScale();
9474   SDValue StoreVal = MSC->getValue();
9475   SDValue BasePtr = MSC->getBasePtr();
9476   SDLoc DL(N);
9477 
9478   // Zap scatters with a zero mask.
9479   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9480     return Chain;
9481 
9482   if (refineUniformBase(BasePtr, Index, DAG)) {
9483     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9484     return DAG.getMaskedScatter(
9485         DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9486         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9487   }
9488 
9489   if (refineIndexType(MSC, Index, MSC->isIndexScaled(), DAG)) {
9490     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
9491     return DAG.getMaskedScatter(
9492         DAG.getVTList(MVT::Other), StoreVal.getValueType(), DL, Ops,
9493         MSC->getMemOperand(), MSC->getIndexType(), MSC->isTruncatingStore());
9494   }
9495 
9496   return SDValue();
9497 }
9498 
9499 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
9500   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
9501   SDValue Mask = MST->getMask();
9502   SDValue Chain = MST->getChain();
9503   SDLoc DL(N);
9504 
9505   // Zap masked stores with a zero mask.
9506   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9507     return Chain;
9508 
9509   // If this is a masked load with an all ones mask, we can use a unmasked load.
9510   // FIXME: Can we do this for indexed, compressing, or truncating stores?
9511   if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9512       MST->isUnindexed() && !MST->isCompressingStore() &&
9513       !MST->isTruncatingStore())
9514     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
9515                         MST->getBasePtr(), MST->getMemOperand());
9516 
9517   // Try transforming N to an indexed store.
9518   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9519     return SDValue(N, 0);
9520 
9521   return SDValue();
9522 }
9523 
9524 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
9525   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
9526   SDValue Mask = MGT->getMask();
9527   SDValue Chain = MGT->getChain();
9528   SDValue Index = MGT->getIndex();
9529   SDValue Scale = MGT->getScale();
9530   SDValue PassThru = MGT->getPassThru();
9531   SDValue BasePtr = MGT->getBasePtr();
9532   SDLoc DL(N);
9533 
9534   // Zap gathers with a zero mask.
9535   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9536     return CombineTo(N, PassThru, MGT->getChain());
9537 
9538   if (refineUniformBase(BasePtr, Index, DAG)) {
9539     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9540     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9541                                PassThru.getValueType(), DL, Ops,
9542                                MGT->getMemOperand(), MGT->getIndexType(),
9543                                MGT->getExtensionType());
9544   }
9545 
9546   if (refineIndexType(MGT, Index, MGT->isIndexScaled(), DAG)) {
9547     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
9548     return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
9549                                PassThru.getValueType(), DL, Ops,
9550                                MGT->getMemOperand(), MGT->getIndexType(),
9551                                MGT->getExtensionType());
9552   }
9553 
9554   return SDValue();
9555 }
9556 
9557 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
9558   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
9559   SDValue Mask = MLD->getMask();
9560   SDLoc DL(N);
9561 
9562   // Zap masked loads with a zero mask.
9563   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
9564     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
9565 
9566   // If this is a masked load with an all ones mask, we can use a unmasked load.
9567   // FIXME: Can we do this for indexed, expanding, or extending loads?
9568   if (ISD::isBuildVectorAllOnes(Mask.getNode()) &&
9569       MLD->isUnindexed() && !MLD->isExpandingLoad() &&
9570       MLD->getExtensionType() == ISD::NON_EXTLOAD) {
9571     SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
9572                                 MLD->getBasePtr(), MLD->getMemOperand());
9573     return CombineTo(N, NewLd, NewLd.getValue(1));
9574   }
9575 
9576   // Try transforming N to an indexed load.
9577   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9578     return SDValue(N, 0);
9579 
9580   return SDValue();
9581 }
9582 
9583 /// A vector select of 2 constant vectors can be simplified to math/logic to
9584 /// avoid a variable select instruction and possibly avoid constant loads.
9585 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
9586   SDValue Cond = N->getOperand(0);
9587   SDValue N1 = N->getOperand(1);
9588   SDValue N2 = N->getOperand(2);
9589   EVT VT = N->getValueType(0);
9590   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
9591       !TLI.convertSelectOfConstantsToMath(VT) ||
9592       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
9593       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
9594     return SDValue();
9595 
9596   // Check if we can use the condition value to increment/decrement a single
9597   // constant value. This simplifies a select to an add and removes a constant
9598   // load/materialization from the general case.
9599   bool AllAddOne = true;
9600   bool AllSubOne = true;
9601   unsigned Elts = VT.getVectorNumElements();
9602   for (unsigned i = 0; i != Elts; ++i) {
9603     SDValue N1Elt = N1.getOperand(i);
9604     SDValue N2Elt = N2.getOperand(i);
9605     if (N1Elt.isUndef() || N2Elt.isUndef())
9606       continue;
9607     if (N1Elt.getValueType() != N2Elt.getValueType())
9608       continue;
9609 
9610     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
9611     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
9612     if (C1 != C2 + 1)
9613       AllAddOne = false;
9614     if (C1 != C2 - 1)
9615       AllSubOne = false;
9616   }
9617 
9618   // Further simplifications for the extra-special cases where the constants are
9619   // all 0 or all -1 should be implemented as folds of these patterns.
9620   SDLoc DL(N);
9621   if (AllAddOne || AllSubOne) {
9622     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
9623     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
9624     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
9625     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
9626     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
9627   }
9628 
9629   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
9630   APInt Pow2C;
9631   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
9632       isNullOrNullSplat(N2)) {
9633     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
9634     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
9635     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
9636   }
9637 
9638   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
9639     return V;
9640 
9641   // The general case for select-of-constants:
9642   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
9643   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
9644   // leave that to a machine-specific pass.
9645   return SDValue();
9646 }
9647 
9648 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
9649   SDValue N0 = N->getOperand(0);
9650   SDValue N1 = N->getOperand(1);
9651   SDValue N2 = N->getOperand(2);
9652   EVT VT = N->getValueType(0);
9653   SDLoc DL(N);
9654 
9655   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
9656     return V;
9657 
9658   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
9659   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
9660     return DAG.getSelect(DL, VT, F, N2, N1);
9661 
9662   // Canonicalize integer abs.
9663   // vselect (setg[te] X,  0),  X, -X ->
9664   // vselect (setgt    X, -1),  X, -X ->
9665   // vselect (setl[te] X,  0), -X,  X ->
9666   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
9667   if (N0.getOpcode() == ISD::SETCC) {
9668     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
9669     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9670     bool isAbs = false;
9671     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
9672 
9673     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
9674          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
9675         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
9676       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
9677     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
9678              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
9679       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
9680 
9681     if (isAbs) {
9682       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
9683         return DAG.getNode(ISD::ABS, DL, VT, LHS);
9684 
9685       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
9686                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
9687                                                   DL, getShiftAmountTy(VT)));
9688       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
9689       AddToWorklist(Shift.getNode());
9690       AddToWorklist(Add.getNode());
9691       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
9692     }
9693 
9694     // vselect x, y (fcmp lt x, y) -> fminnum x, y
9695     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
9696     //
9697     // This is OK if we don't care about what happens if either operand is a
9698     // NaN.
9699     //
9700     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
9701       if (SDValue FMinMax =
9702               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
9703         return FMinMax;
9704     }
9705 
9706     // If this select has a condition (setcc) with narrower operands than the
9707     // select, try to widen the compare to match the select width.
9708     // TODO: This should be extended to handle any constant.
9709     // TODO: This could be extended to handle non-loading patterns, but that
9710     //       requires thorough testing to avoid regressions.
9711     if (isNullOrNullSplat(RHS)) {
9712       EVT NarrowVT = LHS.getValueType();
9713       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
9714       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
9715       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
9716       unsigned WideWidth = WideVT.getScalarSizeInBits();
9717       bool IsSigned = isSignedIntSetCC(CC);
9718       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9719       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
9720           SetCCWidth != 1 && SetCCWidth < WideWidth &&
9721           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
9722           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
9723         // Both compare operands can be widened for free. The LHS can use an
9724         // extended load, and the RHS is a constant:
9725         //   vselect (ext (setcc load(X), C)), N1, N2 -->
9726         //   vselect (setcc extload(X), C'), N1, N2
9727         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9728         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
9729         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
9730         EVT WideSetCCVT = getSetCCResultType(WideVT);
9731         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
9732         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
9733       }
9734     }
9735 
9736     // Match VSELECTs into add with unsigned saturation.
9737     if (hasOperation(ISD::UADDSAT, VT)) {
9738       // Check if one of the arms of the VSELECT is vector with all bits set.
9739       // If it's on the left side invert the predicate to simplify logic below.
9740       SDValue Other;
9741       ISD::CondCode SatCC = CC;
9742       if (ISD::isBuildVectorAllOnes(N1.getNode())) {
9743         Other = N2;
9744         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9745       } else if (ISD::isBuildVectorAllOnes(N2.getNode())) {
9746         Other = N1;
9747       }
9748 
9749       if (Other && Other.getOpcode() == ISD::ADD) {
9750         SDValue CondLHS = LHS, CondRHS = RHS;
9751         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9752 
9753         // Canonicalize condition operands.
9754         if (SatCC == ISD::SETUGE) {
9755           std::swap(CondLHS, CondRHS);
9756           SatCC = ISD::SETULE;
9757         }
9758 
9759         // We can test against either of the addition operands.
9760         // x <= x+y ? x+y : ~0 --> uaddsat x, y
9761         // x+y >= x ? x+y : ~0 --> uaddsat x, y
9762         if (SatCC == ISD::SETULE && Other == CondRHS &&
9763             (OpLHS == CondLHS || OpRHS == CondLHS))
9764           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9765 
9766         if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
9767             CondLHS == OpLHS) {
9768           // If the RHS is a constant we have to reverse the const
9769           // canonicalization.
9770           // x >= ~C ? x+C : ~0 --> uaddsat x, C
9771           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9772             return Cond->getAPIntValue() == ~Op->getAPIntValue();
9773           };
9774           if (SatCC == ISD::SETULE &&
9775               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
9776             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
9777         }
9778       }
9779     }
9780 
9781     // Match VSELECTs into sub with unsigned saturation.
9782     if (hasOperation(ISD::USUBSAT, VT)) {
9783       // Check if one of the arms of the VSELECT is a zero vector. If it's on
9784       // the left side invert the predicate to simplify logic below.
9785       SDValue Other;
9786       ISD::CondCode SatCC = CC;
9787       if (ISD::isBuildVectorAllZeros(N1.getNode())) {
9788         Other = N2;
9789         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
9790       } else if (ISD::isBuildVectorAllZeros(N2.getNode())) {
9791         Other = N1;
9792       }
9793 
9794       if (Other && Other.getNumOperands() == 2 && Other.getOperand(0) == LHS) {
9795         SDValue CondRHS = RHS;
9796         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
9797 
9798         // Look for a general sub with unsigned saturation first.
9799         // x >= y ? x-y : 0 --> usubsat x, y
9800         // x >  y ? x-y : 0 --> usubsat x, y
9801         if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
9802             Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
9803           return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9804 
9805         if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
9806           if (isa<BuildVectorSDNode>(CondRHS)) {
9807             // If the RHS is a constant we have to reverse the const
9808             // canonicalization.
9809             // x > C-1 ? x+-C : 0 --> usubsat x, C
9810             auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
9811               return (!Op && !Cond) ||
9812                      (Op && Cond &&
9813                       Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
9814             };
9815             if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
9816                 ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
9817                                           /*AllowUndefs*/ true)) {
9818               OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
9819                                   OpRHS);
9820               return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9821             }
9822 
9823             // Another special case: If C was a sign bit, the sub has been
9824             // canonicalized into a xor.
9825             // FIXME: Would it be better to use computeKnownBits to determine
9826             //        whether it's safe to decanonicalize the xor?
9827             // x s< 0 ? x^C : 0 --> usubsat x, C
9828             if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
9829               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
9830                   ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
9831                   OpRHSConst->getAPIntValue().isSignMask()) {
9832                 // Note that we have to rebuild the RHS constant here to ensure
9833                 // we don't rely on particular values of undef lanes.
9834                 OpRHS = DAG.getConstant(OpRHSConst->getAPIntValue(), DL, VT);
9835                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
9836               }
9837             }
9838           }
9839         }
9840       }
9841     }
9842   }
9843 
9844   if (SimplifySelectOps(N, N1, N2))
9845     return SDValue(N, 0);  // Don't revisit N.
9846 
9847   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
9848   if (ISD::isBuildVectorAllOnes(N0.getNode()))
9849     return N1;
9850   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
9851   if (ISD::isBuildVectorAllZeros(N0.getNode()))
9852     return N2;
9853 
9854   // The ConvertSelectToConcatVector function is assuming both the above
9855   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
9856   // and addressed.
9857   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
9858       N2.getOpcode() == ISD::CONCAT_VECTORS &&
9859       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
9860     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
9861       return CV;
9862   }
9863 
9864   if (SDValue V = foldVSelectOfConstants(N))
9865     return V;
9866 
9867   return SDValue();
9868 }
9869 
9870 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
9871   SDValue N0 = N->getOperand(0);
9872   SDValue N1 = N->getOperand(1);
9873   SDValue N2 = N->getOperand(2);
9874   SDValue N3 = N->getOperand(3);
9875   SDValue N4 = N->getOperand(4);
9876   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
9877 
9878   // fold select_cc lhs, rhs, x, x, cc -> x
9879   if (N2 == N3)
9880     return N2;
9881 
9882   // Determine if the condition we're dealing with is constant
9883   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
9884                                   CC, SDLoc(N), false)) {
9885     AddToWorklist(SCC.getNode());
9886 
9887     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
9888       if (!SCCC->isNullValue())
9889         return N2;    // cond always true -> true val
9890       else
9891         return N3;    // cond always false -> false val
9892     } else if (SCC->isUndef()) {
9893       // When the condition is UNDEF, just return the first operand. This is
9894       // coherent the DAG creation, no setcc node is created in this case
9895       return N2;
9896     } else if (SCC.getOpcode() == ISD::SETCC) {
9897       // Fold to a simpler select_cc
9898       SDValue SelectOp = DAG.getNode(
9899           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
9900           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
9901       SelectOp->setFlags(SCC->getFlags());
9902       return SelectOp;
9903     }
9904   }
9905 
9906   // If we can fold this based on the true/false value, do so.
9907   if (SimplifySelectOps(N, N2, N3))
9908     return SDValue(N, 0);  // Don't revisit N.
9909 
9910   // fold select_cc into other things, such as min/max/abs
9911   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
9912 }
9913 
9914 SDValue DAGCombiner::visitSETCC(SDNode *N) {
9915   // setcc is very commonly used as an argument to brcond. This pattern
9916   // also lend itself to numerous combines and, as a result, it is desired
9917   // we keep the argument to a brcond as a setcc as much as possible.
9918   bool PreferSetCC =
9919       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
9920 
9921   SDValue Combined = SimplifySetCC(
9922       N->getValueType(0), N->getOperand(0), N->getOperand(1),
9923       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
9924 
9925   if (!Combined)
9926     return SDValue();
9927 
9928   // If we prefer to have a setcc, and we don't, we'll try our best to
9929   // recreate one using rebuildSetCC.
9930   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
9931     SDValue NewSetCC = rebuildSetCC(Combined);
9932 
9933     // We don't have anything interesting to combine to.
9934     if (NewSetCC.getNode() == N)
9935       return SDValue();
9936 
9937     if (NewSetCC)
9938       return NewSetCC;
9939   }
9940 
9941   return Combined;
9942 }
9943 
9944 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9945   SDValue LHS = N->getOperand(0);
9946   SDValue RHS = N->getOperand(1);
9947   SDValue Carry = N->getOperand(2);
9948   SDValue Cond = N->getOperand(3);
9949 
9950   // If Carry is false, fold to a regular SETCC.
9951   if (isNullConstant(Carry))
9952     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9953 
9954   return SDValue();
9955 }
9956 
9957 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9958 /// a build_vector of constants.
9959 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9960 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9961 /// Vector extends are not folded if operations are legal; this is to
9962 /// avoid introducing illegal build_vector dag nodes.
9963 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9964                                          SelectionDAG &DAG, bool LegalTypes) {
9965   unsigned Opcode = N->getOpcode();
9966   SDValue N0 = N->getOperand(0);
9967   EVT VT = N->getValueType(0);
9968   SDLoc DL(N);
9969 
9970   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9971          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9972          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9973          && "Expected EXTEND dag node in input!");
9974 
9975   // fold (sext c1) -> c1
9976   // fold (zext c1) -> c1
9977   // fold (aext c1) -> c1
9978   if (isa<ConstantSDNode>(N0))
9979     return DAG.getNode(Opcode, DL, VT, N0);
9980 
9981   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9982   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9983   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9984   if (N0->getOpcode() == ISD::SELECT) {
9985     SDValue Op1 = N0->getOperand(1);
9986     SDValue Op2 = N0->getOperand(2);
9987     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9988         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9989       // For any_extend, choose sign extension of the constants to allow a
9990       // possible further transform to sign_extend_inreg.i.e.
9991       //
9992       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9993       // t2: i64 = any_extend t1
9994       // -->
9995       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9996       // -->
9997       // t4: i64 = sign_extend_inreg t3
9998       unsigned FoldOpc = Opcode;
9999       if (FoldOpc == ISD::ANY_EXTEND)
10000         FoldOpc = ISD::SIGN_EXTEND;
10001       return DAG.getSelect(DL, VT, N0->getOperand(0),
10002                            DAG.getNode(FoldOpc, DL, VT, Op1),
10003                            DAG.getNode(FoldOpc, DL, VT, Op2));
10004     }
10005   }
10006 
10007   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
10008   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
10009   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
10010   EVT SVT = VT.getScalarType();
10011   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
10012       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
10013     return SDValue();
10014 
10015   // We can fold this node into a build_vector.
10016   unsigned VTBits = SVT.getSizeInBits();
10017   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
10018   SmallVector<SDValue, 8> Elts;
10019   unsigned NumElts = VT.getVectorNumElements();
10020 
10021   // For zero-extensions, UNDEF elements still guarantee to have the upper
10022   // bits set to zero.
10023   bool IsZext =
10024       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
10025 
10026   for (unsigned i = 0; i != NumElts; ++i) {
10027     SDValue Op = N0.getOperand(i);
10028     if (Op.isUndef()) {
10029       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
10030       continue;
10031     }
10032 
10033     SDLoc DL(Op);
10034     // Get the constant value and if needed trunc it to the size of the type.
10035     // Nodes like build_vector might have constants wider than the scalar type.
10036     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
10037     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
10038       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
10039     else
10040       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
10041   }
10042 
10043   return DAG.getBuildVector(VT, DL, Elts);
10044 }
10045 
10046 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
10047 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
10048 // transformation. Returns true if extension are possible and the above
10049 // mentioned transformation is profitable.
10050 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
10051                                     unsigned ExtOpc,
10052                                     SmallVectorImpl<SDNode *> &ExtendNodes,
10053                                     const TargetLowering &TLI) {
10054   bool HasCopyToRegUses = false;
10055   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
10056   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
10057                             UE = N0.getNode()->use_end();
10058        UI != UE; ++UI) {
10059     SDNode *User = *UI;
10060     if (User == N)
10061       continue;
10062     if (UI.getUse().getResNo() != N0.getResNo())
10063       continue;
10064     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
10065     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
10066       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
10067       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
10068         // Sign bits will be lost after a zext.
10069         return false;
10070       bool Add = false;
10071       for (unsigned i = 0; i != 2; ++i) {
10072         SDValue UseOp = User->getOperand(i);
10073         if (UseOp == N0)
10074           continue;
10075         if (!isa<ConstantSDNode>(UseOp))
10076           return false;
10077         Add = true;
10078       }
10079       if (Add)
10080         ExtendNodes.push_back(User);
10081       continue;
10082     }
10083     // If truncates aren't free and there are users we can't
10084     // extend, it isn't worthwhile.
10085     if (!isTruncFree)
10086       return false;
10087     // Remember if this value is live-out.
10088     if (User->getOpcode() == ISD::CopyToReg)
10089       HasCopyToRegUses = true;
10090   }
10091 
10092   if (HasCopyToRegUses) {
10093     bool BothLiveOut = false;
10094     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
10095          UI != UE; ++UI) {
10096       SDUse &Use = UI.getUse();
10097       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
10098         BothLiveOut = true;
10099         break;
10100       }
10101     }
10102     if (BothLiveOut)
10103       // Both unextended and extended values are live out. There had better be
10104       // a good reason for the transformation.
10105       return ExtendNodes.size();
10106   }
10107   return true;
10108 }
10109 
10110 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
10111                                   SDValue OrigLoad, SDValue ExtLoad,
10112                                   ISD::NodeType ExtType) {
10113   // Extend SetCC uses if necessary.
10114   SDLoc DL(ExtLoad);
10115   for (SDNode *SetCC : SetCCs) {
10116     SmallVector<SDValue, 4> Ops;
10117 
10118     for (unsigned j = 0; j != 2; ++j) {
10119       SDValue SOp = SetCC->getOperand(j);
10120       if (SOp == OrigLoad)
10121         Ops.push_back(ExtLoad);
10122       else
10123         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
10124     }
10125 
10126     Ops.push_back(SetCC->getOperand(2));
10127     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
10128   }
10129 }
10130 
10131 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
10132 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
10133   SDValue N0 = N->getOperand(0);
10134   EVT DstVT = N->getValueType(0);
10135   EVT SrcVT = N0.getValueType();
10136 
10137   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10138           N->getOpcode() == ISD::ZERO_EXTEND) &&
10139          "Unexpected node type (not an extend)!");
10140 
10141   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
10142   // For example, on a target with legal v4i32, but illegal v8i32, turn:
10143   //   (v8i32 (sext (v8i16 (load x))))
10144   // into:
10145   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
10146   //                          (v4i32 (sextload (x + 16)))))
10147   // Where uses of the original load, i.e.:
10148   //   (v8i16 (load x))
10149   // are replaced with:
10150   //   (v8i16 (truncate
10151   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
10152   //                            (v4i32 (sextload (x + 16)))))))
10153   //
10154   // This combine is only applicable to illegal, but splittable, vectors.
10155   // All legal types, and illegal non-vector types, are handled elsewhere.
10156   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
10157   //
10158   if (N0->getOpcode() != ISD::LOAD)
10159     return SDValue();
10160 
10161   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10162 
10163   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
10164       !N0.hasOneUse() || !LN0->isSimple() ||
10165       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
10166       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10167     return SDValue();
10168 
10169   SmallVector<SDNode *, 4> SetCCs;
10170   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
10171     return SDValue();
10172 
10173   ISD::LoadExtType ExtType =
10174       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
10175 
10176   // Try to split the vector types to get down to legal types.
10177   EVT SplitSrcVT = SrcVT;
10178   EVT SplitDstVT = DstVT;
10179   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
10180          SplitSrcVT.getVectorNumElements() > 1) {
10181     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
10182     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
10183   }
10184 
10185   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
10186     return SDValue();
10187 
10188   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
10189 
10190   SDLoc DL(N);
10191   const unsigned NumSplits =
10192       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
10193   const unsigned Stride = SplitSrcVT.getStoreSize();
10194   SmallVector<SDValue, 4> Loads;
10195   SmallVector<SDValue, 4> Chains;
10196 
10197   SDValue BasePtr = LN0->getBasePtr();
10198   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
10199     const unsigned Offset = Idx * Stride;
10200     const Align Align = commonAlignment(LN0->getAlign(), Offset);
10201 
10202     SDValue SplitLoad = DAG.getExtLoad(
10203         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
10204         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
10205         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10206 
10207     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL);
10208 
10209     Loads.push_back(SplitLoad.getValue(0));
10210     Chains.push_back(SplitLoad.getValue(1));
10211   }
10212 
10213   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
10214   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
10215 
10216   // Simplify TF.
10217   AddToWorklist(NewChain.getNode());
10218 
10219   CombineTo(N, NewValue);
10220 
10221   // Replace uses of the original load (before extension)
10222   // with a truncate of the concatenated sextloaded vectors.
10223   SDValue Trunc =
10224       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
10225   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
10226   CombineTo(N0.getNode(), Trunc, NewChain);
10227   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10228 }
10229 
10230 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10231 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10232 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
10233   assert(N->getOpcode() == ISD::ZERO_EXTEND);
10234   EVT VT = N->getValueType(0);
10235   EVT OrigVT = N->getOperand(0).getValueType();
10236   if (TLI.isZExtFree(OrigVT, VT))
10237     return SDValue();
10238 
10239   // and/or/xor
10240   SDValue N0 = N->getOperand(0);
10241   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10242         N0.getOpcode() == ISD::XOR) ||
10243       N0.getOperand(1).getOpcode() != ISD::Constant ||
10244       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
10245     return SDValue();
10246 
10247   // shl/shr
10248   SDValue N1 = N0->getOperand(0);
10249   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
10250       N1.getOperand(1).getOpcode() != ISD::Constant ||
10251       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
10252     return SDValue();
10253 
10254   // load
10255   if (!isa<LoadSDNode>(N1.getOperand(0)))
10256     return SDValue();
10257   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
10258   EVT MemVT = Load->getMemoryVT();
10259   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
10260       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
10261     return SDValue();
10262 
10263 
10264   // If the shift op is SHL, the logic op must be AND, otherwise the result
10265   // will be wrong.
10266   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
10267     return SDValue();
10268 
10269   if (!N0.hasOneUse() || !N1.hasOneUse())
10270     return SDValue();
10271 
10272   SmallVector<SDNode*, 4> SetCCs;
10273   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
10274                                ISD::ZERO_EXTEND, SetCCs, TLI))
10275     return SDValue();
10276 
10277   // Actually do the transformation.
10278   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
10279                                    Load->getChain(), Load->getBasePtr(),
10280                                    Load->getMemoryVT(), Load->getMemOperand());
10281 
10282   SDLoc DL1(N1);
10283   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
10284                               N1.getOperand(1));
10285 
10286   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10287   SDLoc DL0(N0);
10288   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
10289                             DAG.getConstant(Mask, DL0, VT));
10290 
10291   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10292   CombineTo(N, And);
10293   if (SDValue(Load, 0).hasOneUse()) {
10294     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
10295   } else {
10296     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
10297                                 Load->getValueType(0), ExtLoad);
10298     CombineTo(Load, Trunc, ExtLoad.getValue(1));
10299   }
10300 
10301   // N0 is dead at this point.
10302   recursivelyDeleteUnusedNodes(N0.getNode());
10303 
10304   return SDValue(N,0); // Return N so it doesn't get rechecked!
10305 }
10306 
10307 /// If we're narrowing or widening the result of a vector select and the final
10308 /// size is the same size as a setcc (compare) feeding the select, then try to
10309 /// apply the cast operation to the select's operands because matching vector
10310 /// sizes for a select condition and other operands should be more efficient.
10311 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
10312   unsigned CastOpcode = Cast->getOpcode();
10313   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
10314           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
10315           CastOpcode == ISD::FP_ROUND) &&
10316          "Unexpected opcode for vector select narrowing/widening");
10317 
10318   // We only do this transform before legal ops because the pattern may be
10319   // obfuscated by target-specific operations after legalization. Do not create
10320   // an illegal select op, however, because that may be difficult to lower.
10321   EVT VT = Cast->getValueType(0);
10322   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
10323     return SDValue();
10324 
10325   SDValue VSel = Cast->getOperand(0);
10326   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
10327       VSel.getOperand(0).getOpcode() != ISD::SETCC)
10328     return SDValue();
10329 
10330   // Does the setcc have the same vector size as the casted select?
10331   SDValue SetCC = VSel.getOperand(0);
10332   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
10333   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
10334     return SDValue();
10335 
10336   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
10337   SDValue A = VSel.getOperand(1);
10338   SDValue B = VSel.getOperand(2);
10339   SDValue CastA, CastB;
10340   SDLoc DL(Cast);
10341   if (CastOpcode == ISD::FP_ROUND) {
10342     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
10343     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
10344     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
10345   } else {
10346     CastA = DAG.getNode(CastOpcode, DL, VT, A);
10347     CastB = DAG.getNode(CastOpcode, DL, VT, B);
10348   }
10349   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
10350 }
10351 
10352 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10353 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10354 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
10355                                      const TargetLowering &TLI, EVT VT,
10356                                      bool LegalOperations, SDNode *N,
10357                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
10358   SDNode *N0Node = N0.getNode();
10359   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
10360                                                    : ISD::isZEXTLoad(N0Node);
10361   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
10362       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
10363     return SDValue();
10364 
10365   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10366   EVT MemVT = LN0->getMemoryVT();
10367   if ((LegalOperations || !LN0->isSimple() ||
10368        VT.isVector()) &&
10369       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
10370     return SDValue();
10371 
10372   SDValue ExtLoad =
10373       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10374                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
10375   Combiner.CombineTo(N, ExtLoad);
10376   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10377   if (LN0->use_empty())
10378     Combiner.recursivelyDeleteUnusedNodes(LN0);
10379   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10380 }
10381 
10382 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
10383 // Only generate vector extloads when 1) they're legal, and 2) they are
10384 // deemed desirable by the target.
10385 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
10386                                   const TargetLowering &TLI, EVT VT,
10387                                   bool LegalOperations, SDNode *N, SDValue N0,
10388                                   ISD::LoadExtType ExtLoadType,
10389                                   ISD::NodeType ExtOpc) {
10390   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
10391       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
10392       ((LegalOperations || VT.isVector() ||
10393         !cast<LoadSDNode>(N0)->isSimple()) &&
10394        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
10395     return {};
10396 
10397   bool DoXform = true;
10398   SmallVector<SDNode *, 4> SetCCs;
10399   if (!N0.hasOneUse())
10400     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
10401   if (VT.isVector())
10402     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
10403   if (!DoXform)
10404     return {};
10405 
10406   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10407   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
10408                                    LN0->getBasePtr(), N0.getValueType(),
10409                                    LN0->getMemOperand());
10410   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
10411   // If the load value is used only by N, replace it via CombineTo N.
10412   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
10413   Combiner.CombineTo(N, ExtLoad);
10414   if (NoReplaceTrunc) {
10415     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10416     Combiner.recursivelyDeleteUnusedNodes(LN0);
10417   } else {
10418     SDValue Trunc =
10419         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
10420     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10421   }
10422   return SDValue(N, 0); // Return N so it doesn't get rechecked!
10423 }
10424 
10425 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
10426                                         const TargetLowering &TLI, EVT VT,
10427                                         SDNode *N, SDValue N0,
10428                                         ISD::LoadExtType ExtLoadType,
10429                                         ISD::NodeType ExtOpc) {
10430   if (!N0.hasOneUse())
10431     return SDValue();
10432 
10433   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
10434   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
10435     return SDValue();
10436 
10437   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
10438     return SDValue();
10439 
10440   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
10441     return SDValue();
10442 
10443   SDLoc dl(Ld);
10444   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
10445   SDValue NewLoad = DAG.getMaskedLoad(
10446       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
10447       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
10448       ExtLoadType, Ld->isExpandingLoad());
10449   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
10450   return NewLoad;
10451 }
10452 
10453 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
10454                                        bool LegalOperations) {
10455   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
10456           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
10457 
10458   SDValue SetCC = N->getOperand(0);
10459   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
10460       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
10461     return SDValue();
10462 
10463   SDValue X = SetCC.getOperand(0);
10464   SDValue Ones = SetCC.getOperand(1);
10465   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
10466   EVT VT = N->getValueType(0);
10467   EVT XVT = X.getValueType();
10468   // setge X, C is canonicalized to setgt, so we do not need to match that
10469   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
10470   // not require the 'not' op.
10471   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
10472     // Invert and smear/shift the sign bit:
10473     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
10474     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
10475     SDLoc DL(N);
10476     unsigned ShCt = VT.getSizeInBits() - 1;
10477     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10478     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
10479       SDValue NotX = DAG.getNOT(DL, X, VT);
10480       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
10481       auto ShiftOpcode =
10482         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
10483       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
10484     }
10485   }
10486   return SDValue();
10487 }
10488 
10489 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
10490   SDValue N0 = N->getOperand(0);
10491   EVT VT = N->getValueType(0);
10492   SDLoc DL(N);
10493 
10494   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10495     return Res;
10496 
10497   // fold (sext (sext x)) -> (sext x)
10498   // fold (sext (aext x)) -> (sext x)
10499   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10500     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
10501 
10502   if (N0.getOpcode() == ISD::TRUNCATE) {
10503     // fold (sext (truncate (load x))) -> (sext (smaller load x))
10504     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
10505     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10506       SDNode *oye = N0.getOperand(0).getNode();
10507       if (NarrowLoad.getNode() != N0.getNode()) {
10508         CombineTo(N0.getNode(), NarrowLoad);
10509         // CombineTo deleted the truncate, if needed, but not what's under it.
10510         AddToWorklist(oye);
10511       }
10512       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10513     }
10514 
10515     // See if the value being truncated is already sign extended.  If so, just
10516     // eliminate the trunc/sext pair.
10517     SDValue Op = N0.getOperand(0);
10518     unsigned OpBits   = Op.getScalarValueSizeInBits();
10519     unsigned MidBits  = N0.getScalarValueSizeInBits();
10520     unsigned DestBits = VT.getScalarSizeInBits();
10521     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
10522 
10523     if (OpBits == DestBits) {
10524       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
10525       // bits, it is already ready.
10526       if (NumSignBits > DestBits-MidBits)
10527         return Op;
10528     } else if (OpBits < DestBits) {
10529       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
10530       // bits, just sext from i32.
10531       if (NumSignBits > OpBits-MidBits)
10532         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
10533     } else {
10534       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
10535       // bits, just truncate to i32.
10536       if (NumSignBits > OpBits-MidBits)
10537         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
10538     }
10539 
10540     // fold (sext (truncate x)) -> (sextinreg x).
10541     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
10542                                                  N0.getValueType())) {
10543       if (OpBits < DestBits)
10544         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
10545       else if (OpBits > DestBits)
10546         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
10547       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
10548                          DAG.getValueType(N0.getValueType()));
10549     }
10550   }
10551 
10552   // Try to simplify (sext (load x)).
10553   if (SDValue foldedExt =
10554           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10555                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
10556     return foldedExt;
10557 
10558   if (SDValue foldedExt =
10559       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
10560                                ISD::SIGN_EXTEND))
10561     return foldedExt;
10562 
10563   // fold (sext (load x)) to multiple smaller sextloads.
10564   // Only on illegal but splittable vectors.
10565   if (SDValue ExtLoad = CombineExtLoad(N))
10566     return ExtLoad;
10567 
10568   // Try to simplify (sext (sextload x)).
10569   if (SDValue foldedExt = tryToFoldExtOfExtload(
10570           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
10571     return foldedExt;
10572 
10573   // fold (sext (and/or/xor (load x), cst)) ->
10574   //      (and/or/xor (sextload x), (sext cst))
10575   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10576        N0.getOpcode() == ISD::XOR) &&
10577       isa<LoadSDNode>(N0.getOperand(0)) &&
10578       N0.getOperand(1).getOpcode() == ISD::Constant &&
10579       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10580     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10581     EVT MemVT = LN00->getMemoryVT();
10582     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
10583       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
10584       SmallVector<SDNode*, 4> SetCCs;
10585       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10586                                              ISD::SIGN_EXTEND, SetCCs, TLI);
10587       if (DoXform) {
10588         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
10589                                          LN00->getChain(), LN00->getBasePtr(),
10590                                          LN00->getMemoryVT(),
10591                                          LN00->getMemOperand());
10592         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
10593         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10594                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10595         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
10596         bool NoReplaceTruncAnd = !N0.hasOneUse();
10597         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10598         CombineTo(N, And);
10599         // If N0 has multiple uses, change other uses as well.
10600         if (NoReplaceTruncAnd) {
10601           SDValue TruncAnd =
10602               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10603           CombineTo(N0.getNode(), TruncAnd);
10604         }
10605         if (NoReplaceTrunc) {
10606           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10607         } else {
10608           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10609                                       LN00->getValueType(0), ExtLoad);
10610           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10611         }
10612         return SDValue(N,0); // Return N so it doesn't get rechecked!
10613       }
10614     }
10615   }
10616 
10617   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10618     return V;
10619 
10620   if (N0.getOpcode() == ISD::SETCC) {
10621     SDValue N00 = N0.getOperand(0);
10622     SDValue N01 = N0.getOperand(1);
10623     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
10624     EVT N00VT = N00.getValueType();
10625 
10626     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
10627     // Only do this before legalize for now.
10628     if (VT.isVector() && !LegalOperations &&
10629         TLI.getBooleanContents(N00VT) ==
10630             TargetLowering::ZeroOrNegativeOneBooleanContent) {
10631       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
10632       // of the same size as the compared operands. Only optimize sext(setcc())
10633       // if this is the case.
10634       EVT SVT = getSetCCResultType(N00VT);
10635 
10636       // If we already have the desired type, don't change it.
10637       if (SVT != N0.getValueType()) {
10638         // We know that the # elements of the results is the same as the
10639         // # elements of the compare (and the # elements of the compare result
10640         // for that matter).  Check to see that they are the same size.  If so,
10641         // we know that the element size of the sext'd result matches the
10642         // element size of the compare operands.
10643         if (VT.getSizeInBits() == SVT.getSizeInBits())
10644           return DAG.getSetCC(DL, VT, N00, N01, CC);
10645 
10646         // If the desired elements are smaller or larger than the source
10647         // elements, we can use a matching integer vector type and then
10648         // truncate/sign extend.
10649         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
10650         if (SVT == MatchingVecType) {
10651           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
10652           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
10653         }
10654       }
10655     }
10656 
10657     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
10658     // Here, T can be 1 or -1, depending on the type of the setcc and
10659     // getBooleanContents().
10660     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
10661 
10662     // To determine the "true" side of the select, we need to know the high bit
10663     // of the value returned by the setcc if it evaluates to true.
10664     // If the type of the setcc is i1, then the true case of the select is just
10665     // sext(i1 1), that is, -1.
10666     // If the type of the setcc is larger (say, i8) then the value of the high
10667     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
10668     // of the appropriate width.
10669     SDValue ExtTrueVal = (SetCCWidth == 1)
10670                              ? DAG.getAllOnesConstant(DL, VT)
10671                              : DAG.getBoolConstant(true, DL, VT, N00VT);
10672     SDValue Zero = DAG.getConstant(0, DL, VT);
10673     if (SDValue SCC =
10674             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
10675       return SCC;
10676 
10677     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
10678       EVT SetCCVT = getSetCCResultType(N00VT);
10679       // Don't do this transform for i1 because there's a select transform
10680       // that would reverse it.
10681       // TODO: We should not do this transform at all without a target hook
10682       // because a sext is likely cheaper than a select?
10683       if (SetCCVT.getScalarSizeInBits() != 1 &&
10684           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
10685         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
10686         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
10687       }
10688     }
10689   }
10690 
10691   // fold (sext x) -> (zext x) if the sign bit is known zero.
10692   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
10693       DAG.SignBitIsZero(N0))
10694     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
10695 
10696   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10697     return NewVSel;
10698 
10699   // Eliminate this sign extend by doing a negation in the destination type:
10700   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
10701   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
10702       isNullOrNullSplat(N0.getOperand(0)) &&
10703       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
10704       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
10705     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
10706     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
10707   }
10708   // Eliminate this sign extend by doing a decrement in the destination type:
10709   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
10710   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
10711       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
10712       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10713       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
10714     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
10715     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10716   }
10717 
10718   // fold sext (not i1 X) -> add (zext i1 X), -1
10719   // TODO: This could be extended to handle bool vectors.
10720   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
10721       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
10722                             TLI.isOperationLegal(ISD::ADD, VT)))) {
10723     // If we can eliminate the 'not', the sext form should be better
10724     if (SDValue NewXor = visitXOR(N0.getNode())) {
10725       // Returning N0 is a form of in-visit replacement that may have
10726       // invalidated N0.
10727       if (NewXor.getNode() == N0.getNode()) {
10728         // Return SDValue here as the xor should have already been replaced in
10729         // this sext.
10730         return SDValue();
10731       } else {
10732         // Return a new sext with the new xor.
10733         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
10734       }
10735     }
10736 
10737     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
10738     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
10739   }
10740 
10741   return SDValue();
10742 }
10743 
10744 // isTruncateOf - If N is a truncate of some other value, return true, record
10745 // the value being truncated in Op and which of Op's bits are zero/one in Known.
10746 // This function computes KnownBits to avoid a duplicated call to
10747 // computeKnownBits in the caller.
10748 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
10749                          KnownBits &Known) {
10750   if (N->getOpcode() == ISD::TRUNCATE) {
10751     Op = N->getOperand(0);
10752     Known = DAG.computeKnownBits(Op);
10753     return true;
10754   }
10755 
10756   if (N.getOpcode() != ISD::SETCC ||
10757       N.getValueType().getScalarType() != MVT::i1 ||
10758       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
10759     return false;
10760 
10761   SDValue Op0 = N->getOperand(0);
10762   SDValue Op1 = N->getOperand(1);
10763   assert(Op0.getValueType() == Op1.getValueType());
10764 
10765   if (isNullOrNullSplat(Op0))
10766     Op = Op1;
10767   else if (isNullOrNullSplat(Op1))
10768     Op = Op0;
10769   else
10770     return false;
10771 
10772   Known = DAG.computeKnownBits(Op);
10773 
10774   return (Known.Zero | 1).isAllOnesValue();
10775 }
10776 
10777 /// Given an extending node with a pop-count operand, if the target does not
10778 /// support a pop-count in the narrow source type but does support it in the
10779 /// destination type, widen the pop-count to the destination type.
10780 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
10781   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
10782           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
10783 
10784   SDValue CtPop = Extend->getOperand(0);
10785   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
10786     return SDValue();
10787 
10788   EVT VT = Extend->getValueType(0);
10789   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10790   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
10791       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
10792     return SDValue();
10793 
10794   // zext (ctpop X) --> ctpop (zext X)
10795   SDLoc DL(Extend);
10796   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
10797   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
10798 }
10799 
10800 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
10801   SDValue N0 = N->getOperand(0);
10802   EVT VT = N->getValueType(0);
10803 
10804   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10805     return Res;
10806 
10807   // fold (zext (zext x)) -> (zext x)
10808   // fold (zext (aext x)) -> (zext x)
10809   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
10810     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
10811                        N0.getOperand(0));
10812 
10813   // fold (zext (truncate x)) -> (zext x) or
10814   //      (zext (truncate x)) -> (truncate x)
10815   // This is valid when the truncated bits of x are already zero.
10816   SDValue Op;
10817   KnownBits Known;
10818   if (isTruncateOf(DAG, N0, Op, Known)) {
10819     APInt TruncatedBits =
10820       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
10821       APInt(Op.getScalarValueSizeInBits(), 0) :
10822       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
10823                         N0.getScalarValueSizeInBits(),
10824                         std::min(Op.getScalarValueSizeInBits(),
10825                                  VT.getScalarSizeInBits()));
10826     if (TruncatedBits.isSubsetOf(Known.Zero))
10827       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10828   }
10829 
10830   // fold (zext (truncate x)) -> (and x, mask)
10831   if (N0.getOpcode() == ISD::TRUNCATE) {
10832     // fold (zext (truncate (load x))) -> (zext (smaller load x))
10833     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
10834     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10835       SDNode *oye = N0.getOperand(0).getNode();
10836       if (NarrowLoad.getNode() != N0.getNode()) {
10837         CombineTo(N0.getNode(), NarrowLoad);
10838         // CombineTo deleted the truncate, if needed, but not what's under it.
10839         AddToWorklist(oye);
10840       }
10841       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10842     }
10843 
10844     EVT SrcVT = N0.getOperand(0).getValueType();
10845     EVT MinVT = N0.getValueType();
10846 
10847     // Try to mask before the extension to avoid having to generate a larger mask,
10848     // possibly over several sub-vectors.
10849     if (SrcVT.bitsLT(VT) && VT.isVector()) {
10850       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
10851                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
10852         SDValue Op = N0.getOperand(0);
10853         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10854         AddToWorklist(Op.getNode());
10855         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
10856         // Transfer the debug info; the new node is equivalent to N0.
10857         DAG.transferDbgValues(N0, ZExtOrTrunc);
10858         return ZExtOrTrunc;
10859       }
10860     }
10861 
10862     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
10863       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10864       AddToWorklist(Op.getNode());
10865       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
10866       // We may safely transfer the debug info describing the truncate node over
10867       // to the equivalent and operation.
10868       DAG.transferDbgValues(N0, And);
10869       return And;
10870     }
10871   }
10872 
10873   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
10874   // if either of the casts is not free.
10875   if (N0.getOpcode() == ISD::AND &&
10876       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10877       N0.getOperand(1).getOpcode() == ISD::Constant &&
10878       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10879                            N0.getValueType()) ||
10880        !TLI.isZExtFree(N0.getValueType(), VT))) {
10881     SDValue X = N0.getOperand(0).getOperand(0);
10882     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
10883     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10884     SDLoc DL(N);
10885     return DAG.getNode(ISD::AND, DL, VT,
10886                        X, DAG.getConstant(Mask, DL, VT));
10887   }
10888 
10889   // Try to simplify (zext (load x)).
10890   if (SDValue foldedExt =
10891           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
10892                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
10893     return foldedExt;
10894 
10895   if (SDValue foldedExt =
10896       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
10897                                ISD::ZERO_EXTEND))
10898     return foldedExt;
10899 
10900   // fold (zext (load x)) to multiple smaller zextloads.
10901   // Only on illegal but splittable vectors.
10902   if (SDValue ExtLoad = CombineExtLoad(N))
10903     return ExtLoad;
10904 
10905   // fold (zext (and/or/xor (load x), cst)) ->
10906   //      (and/or/xor (zextload x), (zext cst))
10907   // Unless (and (load x) cst) will match as a zextload already and has
10908   // additional users.
10909   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
10910        N0.getOpcode() == ISD::XOR) &&
10911       isa<LoadSDNode>(N0.getOperand(0)) &&
10912       N0.getOperand(1).getOpcode() == ISD::Constant &&
10913       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
10914     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
10915     EVT MemVT = LN00->getMemoryVT();
10916     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
10917         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
10918       bool DoXform = true;
10919       SmallVector<SDNode*, 4> SetCCs;
10920       if (!N0.hasOneUse()) {
10921         if (N0.getOpcode() == ISD::AND) {
10922           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
10923           EVT LoadResultTy = AndC->getValueType(0);
10924           EVT ExtVT;
10925           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
10926             DoXform = false;
10927         }
10928       }
10929       if (DoXform)
10930         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
10931                                           ISD::ZERO_EXTEND, SetCCs, TLI);
10932       if (DoXform) {
10933         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
10934                                          LN00->getChain(), LN00->getBasePtr(),
10935                                          LN00->getMemoryVT(),
10936                                          LN00->getMemOperand());
10937         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
10938         SDLoc DL(N);
10939         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
10940                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
10941         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
10942         bool NoReplaceTruncAnd = !N0.hasOneUse();
10943         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
10944         CombineTo(N, And);
10945         // If N0 has multiple uses, change other uses as well.
10946         if (NoReplaceTruncAnd) {
10947           SDValue TruncAnd =
10948               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
10949           CombineTo(N0.getNode(), TruncAnd);
10950         }
10951         if (NoReplaceTrunc) {
10952           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
10953         } else {
10954           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
10955                                       LN00->getValueType(0), ExtLoad);
10956           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
10957         }
10958         return SDValue(N,0); // Return N so it doesn't get rechecked!
10959       }
10960     }
10961   }
10962 
10963   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
10964   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
10965   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
10966     return ZExtLoad;
10967 
10968   // Try to simplify (zext (zextload x)).
10969   if (SDValue foldedExt = tryToFoldExtOfExtload(
10970           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
10971     return foldedExt;
10972 
10973   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
10974     return V;
10975 
10976   if (N0.getOpcode() == ISD::SETCC) {
10977     // Only do this before legalize for now.
10978     if (!LegalOperations && VT.isVector() &&
10979         N0.getValueType().getVectorElementType() == MVT::i1) {
10980       EVT N00VT = N0.getOperand(0).getValueType();
10981       if (getSetCCResultType(N00VT) == N0.getValueType())
10982         return SDValue();
10983 
10984       // We know that the # elements of the results is the same as the #
10985       // elements of the compare (and the # elements of the compare result for
10986       // that matter). Check to see that they are the same size. If so, we know
10987       // that the element size of the sext'd result matches the element size of
10988       // the compare operands.
10989       SDLoc DL(N);
10990       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
10991         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
10992         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
10993                                      N0.getOperand(1), N0.getOperand(2));
10994         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
10995       }
10996 
10997       // If the desired elements are smaller or larger than the source
10998       // elements we can use a matching integer vector type and then
10999       // truncate/any extend followed by zext_in_reg.
11000       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11001       SDValue VsetCC =
11002           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
11003                       N0.getOperand(1), N0.getOperand(2));
11004       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
11005                                     N0.getValueType());
11006     }
11007 
11008     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
11009     SDLoc DL(N);
11010     EVT N0VT = N0.getValueType();
11011     EVT N00VT = N0.getOperand(0).getValueType();
11012     if (SDValue SCC = SimplifySelectCC(
11013             DL, N0.getOperand(0), N0.getOperand(1),
11014             DAG.getBoolConstant(true, DL, N0VT, N00VT),
11015             DAG.getBoolConstant(false, DL, N0VT, N00VT),
11016             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11017       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
11018   }
11019 
11020   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
11021   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11022       isa<ConstantSDNode>(N0.getOperand(1)) &&
11023       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
11024       N0.hasOneUse()) {
11025     SDValue ShAmt = N0.getOperand(1);
11026     if (N0.getOpcode() == ISD::SHL) {
11027       SDValue InnerZExt = N0.getOperand(0);
11028       // If the original shl may be shifting out bits, do not perform this
11029       // transformation.
11030       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
11031         InnerZExt.getOperand(0).getValueSizeInBits();
11032       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
11033         return SDValue();
11034     }
11035 
11036     SDLoc DL(N);
11037 
11038     // Ensure that the shift amount is wide enough for the shifted value.
11039     if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
11040       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
11041 
11042     return DAG.getNode(N0.getOpcode(), DL, VT,
11043                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
11044                        ShAmt);
11045   }
11046 
11047   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11048     return NewVSel;
11049 
11050   if (SDValue NewCtPop = widenCtPop(N, DAG))
11051     return NewCtPop;
11052 
11053   return SDValue();
11054 }
11055 
11056 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
11057   SDValue N0 = N->getOperand(0);
11058   EVT VT = N->getValueType(0);
11059 
11060   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11061     return Res;
11062 
11063   // fold (aext (aext x)) -> (aext x)
11064   // fold (aext (zext x)) -> (zext x)
11065   // fold (aext (sext x)) -> (sext x)
11066   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
11067       N0.getOpcode() == ISD::ZERO_EXTEND ||
11068       N0.getOpcode() == ISD::SIGN_EXTEND)
11069     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11070 
11071   // fold (aext (truncate (load x))) -> (aext (smaller load x))
11072   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
11073   if (N0.getOpcode() == ISD::TRUNCATE) {
11074     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
11075       SDNode *oye = N0.getOperand(0).getNode();
11076       if (NarrowLoad.getNode() != N0.getNode()) {
11077         CombineTo(N0.getNode(), NarrowLoad);
11078         // CombineTo deleted the truncate, if needed, but not what's under it.
11079         AddToWorklist(oye);
11080       }
11081       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11082     }
11083   }
11084 
11085   // fold (aext (truncate x))
11086   if (N0.getOpcode() == ISD::TRUNCATE)
11087     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
11088 
11089   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
11090   // if the trunc is not free.
11091   if (N0.getOpcode() == ISD::AND &&
11092       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
11093       N0.getOperand(1).getOpcode() == ISD::Constant &&
11094       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
11095                           N0.getValueType())) {
11096     SDLoc DL(N);
11097     SDValue X = N0.getOperand(0).getOperand(0);
11098     X = DAG.getAnyExtOrTrunc(X, DL, VT);
11099     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
11100     return DAG.getNode(ISD::AND, DL, VT,
11101                        X, DAG.getConstant(Mask, DL, VT));
11102   }
11103 
11104   // fold (aext (load x)) -> (aext (truncate (extload x)))
11105   // None of the supported targets knows how to perform load and any_ext
11106   // on vectors in one instruction, so attempt to fold to zext instead.
11107   if (VT.isVector()) {
11108     // Try to simplify (zext (load x)).
11109     if (SDValue foldedExt =
11110             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
11111                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
11112       return foldedExt;
11113   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
11114              ISD::isUNINDEXEDLoad(N0.getNode()) &&
11115              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
11116     bool DoXform = true;
11117     SmallVector<SDNode *, 4> SetCCs;
11118     if (!N0.hasOneUse())
11119       DoXform =
11120           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
11121     if (DoXform) {
11122       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11123       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
11124                                        LN0->getChain(), LN0->getBasePtr(),
11125                                        N0.getValueType(), LN0->getMemOperand());
11126       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
11127       // If the load value is used only by N, replace it via CombineTo N.
11128       bool NoReplaceTrunc = N0.hasOneUse();
11129       CombineTo(N, ExtLoad);
11130       if (NoReplaceTrunc) {
11131         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11132         recursivelyDeleteUnusedNodes(LN0);
11133       } else {
11134         SDValue Trunc =
11135             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
11136         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
11137       }
11138       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11139     }
11140   }
11141 
11142   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
11143   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
11144   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
11145   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
11146       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
11147     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11148     ISD::LoadExtType ExtType = LN0->getExtensionType();
11149     EVT MemVT = LN0->getMemoryVT();
11150     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
11151       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
11152                                        VT, LN0->getChain(), LN0->getBasePtr(),
11153                                        MemVT, LN0->getMemOperand());
11154       CombineTo(N, ExtLoad);
11155       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
11156       recursivelyDeleteUnusedNodes(LN0);
11157       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11158     }
11159   }
11160 
11161   if (N0.getOpcode() == ISD::SETCC) {
11162     // For vectors:
11163     // aext(setcc) -> vsetcc
11164     // aext(setcc) -> truncate(vsetcc)
11165     // aext(setcc) -> aext(vsetcc)
11166     // Only do this before legalize for now.
11167     if (VT.isVector() && !LegalOperations) {
11168       EVT N00VT = N0.getOperand(0).getValueType();
11169       if (getSetCCResultType(N00VT) == N0.getValueType())
11170         return SDValue();
11171 
11172       // We know that the # elements of the results is the same as the
11173       // # elements of the compare (and the # elements of the compare result
11174       // for that matter).  Check to see that they are the same size.  If so,
11175       // we know that the element size of the sext'd result matches the
11176       // element size of the compare operands.
11177       if (VT.getSizeInBits() == N00VT.getSizeInBits())
11178         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
11179                              N0.getOperand(1),
11180                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
11181 
11182       // If the desired elements are smaller or larger than the source
11183       // elements we can use a matching integer vector type and then
11184       // truncate/any extend
11185       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
11186       SDValue VsetCC =
11187         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
11188                       N0.getOperand(1),
11189                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
11190       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
11191     }
11192 
11193     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
11194     SDLoc DL(N);
11195     if (SDValue SCC = SimplifySelectCC(
11196             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
11197             DAG.getConstant(0, DL, VT),
11198             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
11199       return SCC;
11200   }
11201 
11202   if (SDValue NewCtPop = widenCtPop(N, DAG))
11203     return NewCtPop;
11204 
11205   return SDValue();
11206 }
11207 
11208 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
11209   unsigned Opcode = N->getOpcode();
11210   SDValue N0 = N->getOperand(0);
11211   SDValue N1 = N->getOperand(1);
11212   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
11213 
11214   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
11215   if (N0.getOpcode() == Opcode &&
11216       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
11217     return N0;
11218 
11219   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11220       N0.getOperand(0).getOpcode() == Opcode) {
11221     // We have an assert, truncate, assert sandwich. Make one stronger assert
11222     // by asserting on the smallest asserted type to the larger source type.
11223     // This eliminates the later assert:
11224     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
11225     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
11226     SDValue BigA = N0.getOperand(0);
11227     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11228     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11229            "Asserting zero/sign-extended bits to a type larger than the "
11230            "truncated destination does not provide information");
11231 
11232     SDLoc DL(N);
11233     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
11234     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
11235     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11236                                     BigA.getOperand(0), MinAssertVTVal);
11237     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11238   }
11239 
11240   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
11241   // than X. Just move the AssertZext in front of the truncate and drop the
11242   // AssertSExt.
11243   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
11244       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
11245       Opcode == ISD::AssertZext) {
11246     SDValue BigA = N0.getOperand(0);
11247     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
11248     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
11249            "Asserting zero/sign-extended bits to a type larger than the "
11250            "truncated destination does not provide information");
11251 
11252     if (AssertVT.bitsLT(BigA_AssertVT)) {
11253       SDLoc DL(N);
11254       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
11255                                       BigA.getOperand(0), N1);
11256       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
11257     }
11258   }
11259 
11260   return SDValue();
11261 }
11262 
11263 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
11264   SDLoc DL(N);
11265 
11266   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
11267   SDValue N0 = N->getOperand(0);
11268 
11269   // Fold (assertalign (assertalign x, AL0), AL1) ->
11270   // (assertalign x, max(AL0, AL1))
11271   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
11272     return DAG.getAssertAlign(DL, N0.getOperand(0),
11273                               std::max(AL, AAN->getAlign()));
11274 
11275   // In rare cases, there are trivial arithmetic ops in source operands. Sink
11276   // this assert down to source operands so that those arithmetic ops could be
11277   // exposed to the DAG combining.
11278   switch (N0.getOpcode()) {
11279   default:
11280     break;
11281   case ISD::ADD:
11282   case ISD::SUB: {
11283     unsigned AlignShift = Log2(AL);
11284     SDValue LHS = N0.getOperand(0);
11285     SDValue RHS = N0.getOperand(1);
11286     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
11287     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
11288     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
11289       if (LHSAlignShift < AlignShift)
11290         LHS = DAG.getAssertAlign(DL, LHS, AL);
11291       if (RHSAlignShift < AlignShift)
11292         RHS = DAG.getAssertAlign(DL, RHS, AL);
11293       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
11294     }
11295     break;
11296   }
11297   }
11298 
11299   return SDValue();
11300 }
11301 
11302 /// If the result of a wider load is shifted to right of N  bits and then
11303 /// truncated to a narrower type and where N is a multiple of number of bits of
11304 /// the narrower type, transform it to a narrower load from address + N / num of
11305 /// bits of new type. Also narrow the load if the result is masked with an AND
11306 /// to effectively produce a smaller type. If the result is to be extended, also
11307 /// fold the extension to form a extending load.
11308 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
11309   unsigned Opc = N->getOpcode();
11310 
11311   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
11312   SDValue N0 = N->getOperand(0);
11313   EVT VT = N->getValueType(0);
11314   EVT ExtVT = VT;
11315 
11316   // This transformation isn't valid for vector loads.
11317   if (VT.isVector())
11318     return SDValue();
11319 
11320   unsigned ShAmt = 0;
11321   bool HasShiftedOffset = false;
11322   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
11323   // extended to VT.
11324   if (Opc == ISD::SIGN_EXTEND_INREG) {
11325     ExtType = ISD::SEXTLOAD;
11326     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
11327   } else if (Opc == ISD::SRL) {
11328     // Another special-case: SRL is basically zero-extending a narrower value,
11329     // or it maybe shifting a higher subword, half or byte into the lowest
11330     // bits.
11331     ExtType = ISD::ZEXTLOAD;
11332     N0 = SDValue(N, 0);
11333 
11334     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
11335     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11336     if (!N01 || !LN0)
11337       return SDValue();
11338 
11339     uint64_t ShiftAmt = N01->getZExtValue();
11340     uint64_t MemoryWidth = LN0->getMemoryVT().getScalarSizeInBits();
11341     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
11342       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
11343     else
11344       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
11345                                 VT.getScalarSizeInBits() - ShiftAmt);
11346   } else if (Opc == ISD::AND) {
11347     // An AND with a constant mask is the same as a truncate + zero-extend.
11348     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
11349     if (!AndC)
11350       return SDValue();
11351 
11352     const APInt &Mask = AndC->getAPIntValue();
11353     unsigned ActiveBits = 0;
11354     if (Mask.isMask()) {
11355       ActiveBits = Mask.countTrailingOnes();
11356     } else if (Mask.isShiftedMask()) {
11357       ShAmt = Mask.countTrailingZeros();
11358       APInt ShiftedMask = Mask.lshr(ShAmt);
11359       ActiveBits = ShiftedMask.countTrailingOnes();
11360       HasShiftedOffset = true;
11361     } else
11362       return SDValue();
11363 
11364     ExtType = ISD::ZEXTLOAD;
11365     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
11366   }
11367 
11368   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
11369     SDValue SRL = N0;
11370     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
11371       ShAmt = ConstShift->getZExtValue();
11372       unsigned EVTBits = ExtVT.getScalarSizeInBits();
11373       // Is the shift amount a multiple of size of VT?
11374       if ((ShAmt & (EVTBits-1)) == 0) {
11375         N0 = N0.getOperand(0);
11376         // Is the load width a multiple of size of VT?
11377         if ((N0.getScalarValueSizeInBits() & (EVTBits - 1)) != 0)
11378           return SDValue();
11379       }
11380 
11381       // At this point, we must have a load or else we can't do the transform.
11382       auto *LN0 = dyn_cast<LoadSDNode>(N0);
11383       if (!LN0) return SDValue();
11384 
11385       // Because a SRL must be assumed to *need* to zero-extend the high bits
11386       // (as opposed to anyext the high bits), we can't combine the zextload
11387       // lowering of SRL and an sextload.
11388       if (LN0->getExtensionType() == ISD::SEXTLOAD)
11389         return SDValue();
11390 
11391       // If the shift amount is larger than the input type then we're not
11392       // accessing any of the loaded bytes.  If the load was a zextload/extload
11393       // then the result of the shift+trunc is zero/undef (handled elsewhere).
11394       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
11395         return SDValue();
11396 
11397       // If the SRL is only used by a masking AND, we may be able to adjust
11398       // the ExtVT to make the AND redundant.
11399       SDNode *Mask = *(SRL->use_begin());
11400       if (Mask->getOpcode() == ISD::AND &&
11401           isa<ConstantSDNode>(Mask->getOperand(1))) {
11402         const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
11403         if (ShiftMask.isMask()) {
11404           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
11405                                            ShiftMask.countTrailingOnes());
11406           // If the mask is smaller, recompute the type.
11407           if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
11408               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
11409             ExtVT = MaskedVT;
11410         }
11411       }
11412     }
11413   }
11414 
11415   // If the load is shifted left (and the result isn't shifted back right),
11416   // we can fold the truncate through the shift.
11417   unsigned ShLeftAmt = 0;
11418   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11419       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
11420     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
11421       ShLeftAmt = N01->getZExtValue();
11422       N0 = N0.getOperand(0);
11423     }
11424   }
11425 
11426   // If we haven't found a load, we can't narrow it.
11427   if (!isa<LoadSDNode>(N0))
11428     return SDValue();
11429 
11430   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11431   // Reducing the width of a volatile load is illegal.  For atomics, we may be
11432   // able to reduce the width provided we never widen again. (see D66309)
11433   if (!LN0->isSimple() ||
11434       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
11435     return SDValue();
11436 
11437   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
11438     unsigned LVTStoreBits =
11439         LN0->getMemoryVT().getStoreSizeInBits().getFixedSize();
11440     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize();
11441     return LVTStoreBits - EVTStoreBits - ShAmt;
11442   };
11443 
11444   // For big endian targets, we need to adjust the offset to the pointer to
11445   // load the correct bytes.
11446   if (DAG.getDataLayout().isBigEndian())
11447     ShAmt = AdjustBigEndianShift(ShAmt);
11448 
11449   uint64_t PtrOff = ShAmt / 8;
11450   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
11451   SDLoc DL(LN0);
11452   // The original load itself didn't wrap, so an offset within it doesn't.
11453   SDNodeFlags Flags;
11454   Flags.setNoUnsignedWrap(true);
11455   SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(),
11456                                             TypeSize::Fixed(PtrOff), DL, Flags);
11457   AddToWorklist(NewPtr.getNode());
11458 
11459   SDValue Load;
11460   if (ExtType == ISD::NON_EXTLOAD)
11461     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
11462                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11463                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11464   else
11465     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
11466                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
11467                           NewAlign, LN0->getMemOperand()->getFlags(),
11468                           LN0->getAAInfo());
11469 
11470   // Replace the old load's chain with the new load's chain.
11471   WorklistRemover DeadNodes(*this);
11472   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11473 
11474   // Shift the result left, if we've swallowed a left shift.
11475   SDValue Result = Load;
11476   if (ShLeftAmt != 0) {
11477     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
11478     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
11479       ShImmTy = VT;
11480     // If the shift amount is as large as the result size (but, presumably,
11481     // no larger than the source) then the useful bits of the result are
11482     // zero; we can't simply return the shortened shift, because the result
11483     // of that operation is undefined.
11484     if (ShLeftAmt >= VT.getScalarSizeInBits())
11485       Result = DAG.getConstant(0, DL, VT);
11486     else
11487       Result = DAG.getNode(ISD::SHL, DL, VT,
11488                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
11489   }
11490 
11491   if (HasShiftedOffset) {
11492     // Recalculate the shift amount after it has been altered to calculate
11493     // the offset.
11494     if (DAG.getDataLayout().isBigEndian())
11495       ShAmt = AdjustBigEndianShift(ShAmt);
11496 
11497     // We're using a shifted mask, so the load now has an offset. This means
11498     // that data has been loaded into the lower bytes than it would have been
11499     // before, so we need to shl the loaded data into the correct position in the
11500     // register.
11501     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
11502     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
11503     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
11504   }
11505 
11506   // Return the new loaded value.
11507   return Result;
11508 }
11509 
11510 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
11511   SDValue N0 = N->getOperand(0);
11512   SDValue N1 = N->getOperand(1);
11513   EVT VT = N->getValueType(0);
11514   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
11515   unsigned VTBits = VT.getScalarSizeInBits();
11516   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
11517 
11518   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11519   if (N0.isUndef())
11520     return DAG.getConstant(0, SDLoc(N), VT);
11521 
11522   // fold (sext_in_reg c1) -> c1
11523   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
11524     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
11525 
11526   // If the input is already sign extended, just drop the extension.
11527   if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
11528     return N0;
11529 
11530   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
11531   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
11532       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
11533     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
11534                        N1);
11535 
11536   // fold (sext_in_reg (sext x)) -> (sext x)
11537   // fold (sext_in_reg (aext x)) -> (sext x)
11538   // if x is small enough or if we know that x has more than 1 sign bit and the
11539   // sign_extend_inreg is extending from one of them.
11540   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
11541     SDValue N00 = N0.getOperand(0);
11542     unsigned N00Bits = N00.getScalarValueSizeInBits();
11543     if ((N00Bits <= ExtVTBits ||
11544          (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
11545         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11546       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
11547   }
11548 
11549   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
11550   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
11551        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
11552        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
11553       N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) {
11554     if (!LegalOperations ||
11555         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
11556       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
11557                          N0.getOperand(0));
11558   }
11559 
11560   // fold (sext_in_reg (zext x)) -> (sext x)
11561   // iff we are extending the source sign bit.
11562   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
11563     SDValue N00 = N0.getOperand(0);
11564     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
11565         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
11566       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
11567   }
11568 
11569   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
11570   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
11571     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
11572 
11573   // fold operands of sext_in_reg based on knowledge that the top bits are not
11574   // demanded.
11575   if (SimplifyDemandedBits(SDValue(N, 0)))
11576     return SDValue(N, 0);
11577 
11578   // fold (sext_in_reg (load x)) -> (smaller sextload x)
11579   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
11580   if (SDValue NarrowLoad = ReduceLoadWidth(N))
11581     return NarrowLoad;
11582 
11583   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
11584   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
11585   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
11586   if (N0.getOpcode() == ISD::SRL) {
11587     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
11588       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
11589         // We can turn this into an SRA iff the input to the SRL is already sign
11590         // extended enough.
11591         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
11592         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
11593           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
11594                              N0.getOperand(1));
11595       }
11596   }
11597 
11598   // fold (sext_inreg (extload x)) -> (sextload x)
11599   // If sextload is not supported by target, we can only do the combine when
11600   // load has one use. Doing otherwise can block folding the extload with other
11601   // extends that the target does support.
11602   if (ISD::isEXTLoad(N0.getNode()) &&
11603       ISD::isUNINDEXEDLoad(N0.getNode()) &&
11604       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11605       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
11606         N0.hasOneUse()) ||
11607        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11608     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11609     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11610                                      LN0->getChain(),
11611                                      LN0->getBasePtr(), ExtVT,
11612                                      LN0->getMemOperand());
11613     CombineTo(N, ExtLoad);
11614     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11615     AddToWorklist(ExtLoad.getNode());
11616     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11617   }
11618   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
11619   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
11620       N0.hasOneUse() &&
11621       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
11622       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
11623        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
11624     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11625     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
11626                                      LN0->getChain(),
11627                                      LN0->getBasePtr(), ExtVT,
11628                                      LN0->getMemOperand());
11629     CombineTo(N, ExtLoad);
11630     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11631     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
11632   }
11633 
11634   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
11635   // ignore it if the masked load is already sign extended
11636   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
11637     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
11638         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
11639         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
11640       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
11641           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
11642           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
11643           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
11644       CombineTo(N, ExtMaskedLoad);
11645       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
11646       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11647     }
11648   }
11649 
11650   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
11651   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
11652     if (SDValue(GN0, 0).hasOneUse() &&
11653         ExtVT == GN0->getMemoryVT() &&
11654         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
11655       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
11656                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
11657 
11658       SDValue ExtLoad = DAG.getMaskedGather(
11659           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
11660           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
11661 
11662       CombineTo(N, ExtLoad);
11663       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
11664       AddToWorklist(ExtLoad.getNode());
11665       return SDValue(N, 0); // Return N so it doesn't get rechecked!
11666     }
11667   }
11668 
11669   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
11670   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
11671     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
11672                                            N0.getOperand(1), false))
11673       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
11674   }
11675 
11676   return SDValue();
11677 }
11678 
11679 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
11680   SDValue N0 = N->getOperand(0);
11681   EVT VT = N->getValueType(0);
11682 
11683   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
11684   if (N0.isUndef())
11685     return DAG.getConstant(0, SDLoc(N), VT);
11686 
11687   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11688     return Res;
11689 
11690   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11691     return SDValue(N, 0);
11692 
11693   return SDValue();
11694 }
11695 
11696 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
11697   SDValue N0 = N->getOperand(0);
11698   EVT VT = N->getValueType(0);
11699 
11700   // zext_vector_inreg(undef) = 0 because the top bits will be zero.
11701   if (N0.isUndef())
11702     return DAG.getConstant(0, SDLoc(N), VT);
11703 
11704   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
11705     return Res;
11706 
11707   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
11708     return SDValue(N, 0);
11709 
11710   return SDValue();
11711 }
11712 
11713 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
11714   SDValue N0 = N->getOperand(0);
11715   EVT VT = N->getValueType(0);
11716   EVT SrcVT = N0.getValueType();
11717   bool isLE = DAG.getDataLayout().isLittleEndian();
11718 
11719   // noop truncate
11720   if (SrcVT == VT)
11721     return N0;
11722 
11723   // fold (truncate (truncate x)) -> (truncate x)
11724   if (N0.getOpcode() == ISD::TRUNCATE)
11725     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11726 
11727   // fold (truncate c1) -> c1
11728   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
11729     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
11730     if (C.getNode() != N)
11731       return C;
11732   }
11733 
11734   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
11735   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
11736       N0.getOpcode() == ISD::SIGN_EXTEND ||
11737       N0.getOpcode() == ISD::ANY_EXTEND) {
11738     // if the source is smaller than the dest, we still need an extend.
11739     if (N0.getOperand(0).getValueType().bitsLT(VT))
11740       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
11741     // if the source is larger than the dest, than we just need the truncate.
11742     if (N0.getOperand(0).getValueType().bitsGT(VT))
11743       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
11744     // if the source and dest are the same type, we can drop both the extend
11745     // and the truncate.
11746     return N0.getOperand(0);
11747   }
11748 
11749   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
11750   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
11751     return SDValue();
11752 
11753   // Fold extract-and-trunc into a narrow extract. For example:
11754   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
11755   //   i32 y = TRUNCATE(i64 x)
11756   //        -- becomes --
11757   //   v16i8 b = BITCAST (v2i64 val)
11758   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
11759   //
11760   // Note: We only run this optimization after type legalization (which often
11761   // creates this pattern) and before operation legalization after which
11762   // we need to be more careful about the vector instructions that we generate.
11763   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
11764       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
11765     EVT VecTy = N0.getOperand(0).getValueType();
11766     EVT ExTy = N0.getValueType();
11767     EVT TrTy = N->getValueType(0);
11768 
11769     auto EltCnt = VecTy.getVectorElementCount();
11770     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
11771     auto NewEltCnt = EltCnt * SizeRatio;
11772 
11773     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
11774     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
11775 
11776     SDValue EltNo = N0->getOperand(1);
11777     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
11778       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11779       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
11780 
11781       SDLoc DL(N);
11782       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
11783                          DAG.getBitcast(NVT, N0.getOperand(0)),
11784                          DAG.getVectorIdxConstant(Index, DL));
11785     }
11786   }
11787 
11788   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
11789   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
11790     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
11791         TLI.isTruncateFree(SrcVT, VT)) {
11792       SDLoc SL(N0);
11793       SDValue Cond = N0.getOperand(0);
11794       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11795       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
11796       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
11797     }
11798   }
11799 
11800   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
11801   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
11802       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
11803       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
11804     SDValue Amt = N0.getOperand(1);
11805     KnownBits Known = DAG.computeKnownBits(Amt);
11806     unsigned Size = VT.getScalarSizeInBits();
11807     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
11808       SDLoc SL(N);
11809       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
11810 
11811       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11812       if (AmtVT != Amt.getValueType()) {
11813         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
11814         AddToWorklist(Amt.getNode());
11815       }
11816       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
11817     }
11818   }
11819 
11820   // Attempt to pre-truncate BUILD_VECTOR sources.
11821   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
11822       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
11823       // Avoid creating illegal types if running after type legalizer.
11824       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
11825     SDLoc DL(N);
11826     EVT SVT = VT.getScalarType();
11827     SmallVector<SDValue, 8> TruncOps;
11828     for (const SDValue &Op : N0->op_values()) {
11829       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
11830       TruncOps.push_back(TruncOp);
11831     }
11832     return DAG.getBuildVector(VT, DL, TruncOps);
11833   }
11834 
11835   // Fold a series of buildvector, bitcast, and truncate if possible.
11836   // For example fold
11837   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
11838   //   (2xi32 (buildvector x, y)).
11839   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
11840       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
11841       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
11842       N0.getOperand(0).hasOneUse()) {
11843     SDValue BuildVect = N0.getOperand(0);
11844     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
11845     EVT TruncVecEltTy = VT.getVectorElementType();
11846 
11847     // Check that the element types match.
11848     if (BuildVectEltTy == TruncVecEltTy) {
11849       // Now we only need to compute the offset of the truncated elements.
11850       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
11851       unsigned TruncVecNumElts = VT.getVectorNumElements();
11852       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
11853 
11854       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
11855              "Invalid number of elements");
11856 
11857       SmallVector<SDValue, 8> Opnds;
11858       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
11859         Opnds.push_back(BuildVect.getOperand(i));
11860 
11861       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
11862     }
11863   }
11864 
11865   // See if we can simplify the input to this truncate through knowledge that
11866   // only the low bits are being used.
11867   // For example "trunc (or (shl x, 8), y)" // -> trunc y
11868   // Currently we only perform this optimization on scalars because vectors
11869   // may have different active low bits.
11870   if (!VT.isVector()) {
11871     APInt Mask =
11872         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
11873     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
11874       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
11875   }
11876 
11877   // fold (truncate (load x)) -> (smaller load x)
11878   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
11879   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
11880     if (SDValue Reduced = ReduceLoadWidth(N))
11881       return Reduced;
11882 
11883     // Handle the case where the load remains an extending load even
11884     // after truncation.
11885     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
11886       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11887       if (LN0->isSimple() && LN0->getMemoryVT().bitsLT(VT)) {
11888         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
11889                                          VT, LN0->getChain(), LN0->getBasePtr(),
11890                                          LN0->getMemoryVT(),
11891                                          LN0->getMemOperand());
11892         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
11893         return NewLoad;
11894       }
11895     }
11896   }
11897 
11898   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
11899   // where ... are all 'undef'.
11900   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
11901     SmallVector<EVT, 8> VTs;
11902     SDValue V;
11903     unsigned Idx = 0;
11904     unsigned NumDefs = 0;
11905 
11906     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
11907       SDValue X = N0.getOperand(i);
11908       if (!X.isUndef()) {
11909         V = X;
11910         Idx = i;
11911         NumDefs++;
11912       }
11913       // Stop if more than one members are non-undef.
11914       if (NumDefs > 1)
11915         break;
11916 
11917       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
11918                                      VT.getVectorElementType(),
11919                                      X.getValueType().getVectorElementCount()));
11920     }
11921 
11922     if (NumDefs == 0)
11923       return DAG.getUNDEF(VT);
11924 
11925     if (NumDefs == 1) {
11926       assert(V.getNode() && "The single defined operand is empty!");
11927       SmallVector<SDValue, 8> Opnds;
11928       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
11929         if (i != Idx) {
11930           Opnds.push_back(DAG.getUNDEF(VTs[i]));
11931           continue;
11932         }
11933         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
11934         AddToWorklist(NV.getNode());
11935         Opnds.push_back(NV);
11936       }
11937       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
11938     }
11939   }
11940 
11941   // Fold truncate of a bitcast of a vector to an extract of the low vector
11942   // element.
11943   //
11944   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
11945   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
11946     SDValue VecSrc = N0.getOperand(0);
11947     EVT VecSrcVT = VecSrc.getValueType();
11948     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
11949         (!LegalOperations ||
11950          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
11951       SDLoc SL(N);
11952 
11953       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
11954       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
11955                          DAG.getVectorIdxConstant(Idx, SL));
11956     }
11957   }
11958 
11959   // Simplify the operands using demanded-bits information.
11960   if (!VT.isVector() &&
11961       SimplifyDemandedBits(SDValue(N, 0)))
11962     return SDValue(N, 0);
11963 
11964   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
11965   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
11966   // When the adde's carry is not used.
11967   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
11968       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
11969       // We only do for addcarry before legalize operation
11970       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
11971        TLI.isOperationLegal(N0.getOpcode(), VT))) {
11972     SDLoc SL(N);
11973     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
11974     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
11975     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
11976     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
11977   }
11978 
11979   // fold (truncate (extract_subvector(ext x))) ->
11980   //      (extract_subvector x)
11981   // TODO: This can be generalized to cover cases where the truncate and extract
11982   // do not fully cancel each other out.
11983   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
11984     SDValue N00 = N0.getOperand(0);
11985     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
11986         N00.getOpcode() == ISD::ZERO_EXTEND ||
11987         N00.getOpcode() == ISD::ANY_EXTEND) {
11988       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
11989           VT.getVectorElementType())
11990         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
11991                            N00.getOperand(0), N0.getOperand(1));
11992     }
11993   }
11994 
11995   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
11996     return NewVSel;
11997 
11998   // Narrow a suitable binary operation with a non-opaque constant operand by
11999   // moving it ahead of the truncate. This is limited to pre-legalization
12000   // because targets may prefer a wider type during later combines and invert
12001   // this transform.
12002   switch (N0.getOpcode()) {
12003   case ISD::ADD:
12004   case ISD::SUB:
12005   case ISD::MUL:
12006   case ISD::AND:
12007   case ISD::OR:
12008   case ISD::XOR:
12009     if (!LegalOperations && N0.hasOneUse() &&
12010         (isConstantOrConstantVector(N0.getOperand(0), true) ||
12011          isConstantOrConstantVector(N0.getOperand(1), true))) {
12012       // TODO: We already restricted this to pre-legalization, but for vectors
12013       // we are extra cautious to not create an unsupported operation.
12014       // Target-specific changes are likely needed to avoid regressions here.
12015       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
12016         SDLoc DL(N);
12017         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
12018         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
12019         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
12020       }
12021     }
12022   }
12023 
12024   return SDValue();
12025 }
12026 
12027 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
12028   SDValue Elt = N->getOperand(i);
12029   if (Elt.getOpcode() != ISD::MERGE_VALUES)
12030     return Elt.getNode();
12031   return Elt.getOperand(Elt.getResNo()).getNode();
12032 }
12033 
12034 /// build_pair (load, load) -> load
12035 /// if load locations are consecutive.
12036 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
12037   assert(N->getOpcode() == ISD::BUILD_PAIR);
12038 
12039   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
12040   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
12041 
12042   // A BUILD_PAIR is always having the least significant part in elt 0 and the
12043   // most significant part in elt 1. So when combining into one large load, we
12044   // need to consider the endianness.
12045   if (DAG.getDataLayout().isBigEndian())
12046     std::swap(LD1, LD2);
12047 
12048   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
12049       LD1->getAddressSpace() != LD2->getAddressSpace())
12050     return SDValue();
12051   EVT LD1VT = LD1->getValueType(0);
12052   unsigned LD1Bytes = LD1VT.getStoreSize();
12053   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
12054       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
12055     Align Alignment = LD1->getAlign();
12056     Align NewAlign = DAG.getDataLayout().getABITypeAlign(
12057         VT.getTypeForEVT(*DAG.getContext()));
12058 
12059     if (NewAlign <= Alignment &&
12060         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
12061       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
12062                          LD1->getPointerInfo(), Alignment);
12063   }
12064 
12065   return SDValue();
12066 }
12067 
12068 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
12069   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
12070   // and Lo parts; on big-endian machines it doesn't.
12071   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
12072 }
12073 
12074 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
12075                                     const TargetLowering &TLI) {
12076   // If this is not a bitcast to an FP type or if the target doesn't have
12077   // IEEE754-compliant FP logic, we're done.
12078   EVT VT = N->getValueType(0);
12079   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
12080     return SDValue();
12081 
12082   // TODO: Handle cases where the integer constant is a different scalar
12083   // bitwidth to the FP.
12084   SDValue N0 = N->getOperand(0);
12085   EVT SourceVT = N0.getValueType();
12086   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
12087     return SDValue();
12088 
12089   unsigned FPOpcode;
12090   APInt SignMask;
12091   switch (N0.getOpcode()) {
12092   case ISD::AND:
12093     FPOpcode = ISD::FABS;
12094     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
12095     break;
12096   case ISD::XOR:
12097     FPOpcode = ISD::FNEG;
12098     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12099     break;
12100   case ISD::OR:
12101     FPOpcode = ISD::FABS;
12102     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
12103     break;
12104   default:
12105     return SDValue();
12106   }
12107 
12108   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
12109   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
12110   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
12111   //   fneg (fabs X)
12112   SDValue LogicOp0 = N0.getOperand(0);
12113   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
12114   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
12115       LogicOp0.getOpcode() == ISD::BITCAST &&
12116       LogicOp0.getOperand(0).getValueType() == VT) {
12117     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
12118     NumFPLogicOpsConv++;
12119     if (N0.getOpcode() == ISD::OR)
12120       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
12121     return FPOp;
12122   }
12123 
12124   return SDValue();
12125 }
12126 
12127 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
12128   SDValue N0 = N->getOperand(0);
12129   EVT VT = N->getValueType(0);
12130 
12131   if (N0.isUndef())
12132     return DAG.getUNDEF(VT);
12133 
12134   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
12135   // Only do this before legalize types, unless both types are integer and the
12136   // scalar type is legal. Only do this before legalize ops, since the target
12137   // maybe depending on the bitcast.
12138   // First check to see if this is all constant.
12139   // TODO: Support FP bitcasts after legalize types.
12140   if (VT.isVector() &&
12141       (!LegalTypes ||
12142        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
12143         TLI.isTypeLegal(VT.getVectorElementType()))) &&
12144       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
12145       cast<BuildVectorSDNode>(N0)->isConstant())
12146     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
12147                                              VT.getVectorElementType());
12148 
12149   // If the input is a constant, let getNode fold it.
12150   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
12151     // If we can't allow illegal operations, we need to check that this is just
12152     // a fp -> int or int -> conversion and that the resulting operation will
12153     // be legal.
12154     if (!LegalOperations ||
12155         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
12156          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
12157         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
12158          TLI.isOperationLegal(ISD::Constant, VT))) {
12159       SDValue C = DAG.getBitcast(VT, N0);
12160       if (C.getNode() != N)
12161         return C;
12162     }
12163   }
12164 
12165   // (conv (conv x, t1), t2) -> (conv x, t2)
12166   if (N0.getOpcode() == ISD::BITCAST)
12167     return DAG.getBitcast(VT, N0.getOperand(0));
12168 
12169   // fold (conv (load x)) -> (load (conv*)x)
12170   // If the resultant load doesn't need a higher alignment than the original!
12171   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12172       // Do not remove the cast if the types differ in endian layout.
12173       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
12174           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
12175       // If the load is volatile, we only want to change the load type if the
12176       // resulting load is legal. Otherwise we might increase the number of
12177       // memory accesses. We don't care if the original type was legal or not
12178       // as we assume software couldn't rely on the number of accesses of an
12179       // illegal type.
12180       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
12181        TLI.isOperationLegal(ISD::LOAD, VT))) {
12182     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12183 
12184     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
12185                                     *LN0->getMemOperand())) {
12186       SDValue Load =
12187           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12188                       LN0->getPointerInfo(), LN0->getAlign(),
12189                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12190       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
12191       return Load;
12192     }
12193   }
12194 
12195   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
12196     return V;
12197 
12198   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
12199   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
12200   //
12201   // For ppc_fp128:
12202   // fold (bitcast (fneg x)) ->
12203   //     flipbit = signbit
12204   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12205   //
12206   // fold (bitcast (fabs x)) ->
12207   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
12208   //     (xor (bitcast x) (build_pair flipbit, flipbit))
12209   // This often reduces constant pool loads.
12210   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
12211        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
12212       N0.getNode()->hasOneUse() && VT.isInteger() &&
12213       !VT.isVector() && !N0.getValueType().isVector()) {
12214     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
12215     AddToWorklist(NewConv.getNode());
12216 
12217     SDLoc DL(N);
12218     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12219       assert(VT.getSizeInBits() == 128);
12220       SDValue SignBit = DAG.getConstant(
12221           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
12222       SDValue FlipBit;
12223       if (N0.getOpcode() == ISD::FNEG) {
12224         FlipBit = SignBit;
12225         AddToWorklist(FlipBit.getNode());
12226       } else {
12227         assert(N0.getOpcode() == ISD::FABS);
12228         SDValue Hi =
12229             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
12230                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12231                                               SDLoc(NewConv)));
12232         AddToWorklist(Hi.getNode());
12233         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
12234         AddToWorklist(FlipBit.getNode());
12235       }
12236       SDValue FlipBits =
12237           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12238       AddToWorklist(FlipBits.getNode());
12239       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
12240     }
12241     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12242     if (N0.getOpcode() == ISD::FNEG)
12243       return DAG.getNode(ISD::XOR, DL, VT,
12244                          NewConv, DAG.getConstant(SignBit, DL, VT));
12245     assert(N0.getOpcode() == ISD::FABS);
12246     return DAG.getNode(ISD::AND, DL, VT,
12247                        NewConv, DAG.getConstant(~SignBit, DL, VT));
12248   }
12249 
12250   // fold (bitconvert (fcopysign cst, x)) ->
12251   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
12252   // Note that we don't handle (copysign x, cst) because this can always be
12253   // folded to an fneg or fabs.
12254   //
12255   // For ppc_fp128:
12256   // fold (bitcast (fcopysign cst, x)) ->
12257   //     flipbit = (and (extract_element
12258   //                     (xor (bitcast cst), (bitcast x)), 0),
12259   //                    signbit)
12260   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
12261   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
12262       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
12263       VT.isInteger() && !VT.isVector()) {
12264     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
12265     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
12266     if (isTypeLegal(IntXVT)) {
12267       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
12268       AddToWorklist(X.getNode());
12269 
12270       // If X has a different width than the result/lhs, sext it or truncate it.
12271       unsigned VTWidth = VT.getSizeInBits();
12272       if (OrigXWidth < VTWidth) {
12273         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
12274         AddToWorklist(X.getNode());
12275       } else if (OrigXWidth > VTWidth) {
12276         // To get the sign bit in the right place, we have to shift it right
12277         // before truncating.
12278         SDLoc DL(X);
12279         X = DAG.getNode(ISD::SRL, DL,
12280                         X.getValueType(), X,
12281                         DAG.getConstant(OrigXWidth-VTWidth, DL,
12282                                         X.getValueType()));
12283         AddToWorklist(X.getNode());
12284         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
12285         AddToWorklist(X.getNode());
12286       }
12287 
12288       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
12289         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
12290         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12291         AddToWorklist(Cst.getNode());
12292         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
12293         AddToWorklist(X.getNode());
12294         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
12295         AddToWorklist(XorResult.getNode());
12296         SDValue XorResult64 = DAG.getNode(
12297             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
12298             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
12299                                   SDLoc(XorResult)));
12300         AddToWorklist(XorResult64.getNode());
12301         SDValue FlipBit =
12302             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
12303                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
12304         AddToWorklist(FlipBit.getNode());
12305         SDValue FlipBits =
12306             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
12307         AddToWorklist(FlipBits.getNode());
12308         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
12309       }
12310       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
12311       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
12312                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
12313       AddToWorklist(X.getNode());
12314 
12315       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
12316       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
12317                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
12318       AddToWorklist(Cst.getNode());
12319 
12320       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
12321     }
12322   }
12323 
12324   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
12325   if (N0.getOpcode() == ISD::BUILD_PAIR)
12326     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
12327       return CombineLD;
12328 
12329   // Remove double bitcasts from shuffles - this is often a legacy of
12330   // XformToShuffleWithZero being used to combine bitmaskings (of
12331   // float vectors bitcast to integer vectors) into shuffles.
12332   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
12333   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
12334       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
12335       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
12336       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
12337     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
12338 
12339     // If operands are a bitcast, peek through if it casts the original VT.
12340     // If operands are a constant, just bitcast back to original VT.
12341     auto PeekThroughBitcast = [&](SDValue Op) {
12342       if (Op.getOpcode() == ISD::BITCAST &&
12343           Op.getOperand(0).getValueType() == VT)
12344         return SDValue(Op.getOperand(0));
12345       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
12346           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
12347         return DAG.getBitcast(VT, Op);
12348       return SDValue();
12349     };
12350 
12351     // FIXME: If either input vector is bitcast, try to convert the shuffle to
12352     // the result type of this bitcast. This would eliminate at least one
12353     // bitcast. See the transform in InstCombine.
12354     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
12355     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
12356     if (!(SV0 && SV1))
12357       return SDValue();
12358 
12359     int MaskScale =
12360         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
12361     SmallVector<int, 8> NewMask;
12362     for (int M : SVN->getMask())
12363       for (int i = 0; i != MaskScale; ++i)
12364         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
12365 
12366     SDValue LegalShuffle =
12367         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
12368     if (LegalShuffle)
12369       return LegalShuffle;
12370   }
12371 
12372   return SDValue();
12373 }
12374 
12375 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
12376   EVT VT = N->getValueType(0);
12377   return CombineConsecutiveLoads(N, VT);
12378 }
12379 
12380 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
12381   SDValue N0 = N->getOperand(0);
12382 
12383   // (freeze (freeze x)) -> (freeze x)
12384   if (N0.getOpcode() == ISD::FREEZE)
12385     return N0;
12386 
12387   // If the input is a constant, return it.
12388   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0))
12389     return N0;
12390 
12391   return SDValue();
12392 }
12393 
12394 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
12395 /// operands. DstEltVT indicates the destination element value type.
12396 SDValue DAGCombiner::
12397 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
12398   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
12399 
12400   // If this is already the right type, we're done.
12401   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
12402 
12403   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
12404   unsigned DstBitSize = DstEltVT.getSizeInBits();
12405 
12406   // If this is a conversion of N elements of one type to N elements of another
12407   // type, convert each element.  This handles FP<->INT cases.
12408   if (SrcBitSize == DstBitSize) {
12409     SmallVector<SDValue, 8> Ops;
12410     for (SDValue Op : BV->op_values()) {
12411       // If the vector element type is not legal, the BUILD_VECTOR operands
12412       // are promoted and implicitly truncated.  Make that explicit here.
12413       if (Op.getValueType() != SrcEltVT)
12414         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
12415       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
12416       AddToWorklist(Ops.back().getNode());
12417     }
12418     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12419                               BV->getValueType(0).getVectorNumElements());
12420     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
12421   }
12422 
12423   // Otherwise, we're growing or shrinking the elements.  To avoid having to
12424   // handle annoying details of growing/shrinking FP values, we convert them to
12425   // int first.
12426   if (SrcEltVT.isFloatingPoint()) {
12427     // Convert the input float vector to a int vector where the elements are the
12428     // same sizes.
12429     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
12430     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
12431     SrcEltVT = IntVT;
12432   }
12433 
12434   // Now we know the input is an integer vector.  If the output is a FP type,
12435   // convert to integer first, then to FP of the right size.
12436   if (DstEltVT.isFloatingPoint()) {
12437     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
12438     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
12439 
12440     // Next, convert to FP elements of the same size.
12441     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
12442   }
12443 
12444   SDLoc DL(BV);
12445 
12446   // Okay, we know the src/dst types are both integers of differing types.
12447   // Handling growing first.
12448   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
12449   if (SrcBitSize < DstBitSize) {
12450     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
12451 
12452     SmallVector<SDValue, 8> Ops;
12453     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
12454          i += NumInputsPerOutput) {
12455       bool isLE = DAG.getDataLayout().isLittleEndian();
12456       APInt NewBits = APInt(DstBitSize, 0);
12457       bool EltIsUndef = true;
12458       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
12459         // Shift the previously computed bits over.
12460         NewBits <<= SrcBitSize;
12461         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
12462         if (Op.isUndef()) continue;
12463         EltIsUndef = false;
12464 
12465         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
12466                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
12467       }
12468 
12469       if (EltIsUndef)
12470         Ops.push_back(DAG.getUNDEF(DstEltVT));
12471       else
12472         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
12473     }
12474 
12475     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
12476     return DAG.getBuildVector(VT, DL, Ops);
12477   }
12478 
12479   // Finally, this must be the case where we are shrinking elements: each input
12480   // turns into multiple outputs.
12481   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
12482   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
12483                             NumOutputsPerInput*BV->getNumOperands());
12484   SmallVector<SDValue, 8> Ops;
12485 
12486   for (const SDValue &Op : BV->op_values()) {
12487     if (Op.isUndef()) {
12488       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
12489       continue;
12490     }
12491 
12492     APInt OpVal = cast<ConstantSDNode>(Op)->
12493                   getAPIntValue().zextOrTrunc(SrcBitSize);
12494 
12495     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
12496       APInt ThisVal = OpVal.trunc(DstBitSize);
12497       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
12498       OpVal.lshrInPlace(DstBitSize);
12499     }
12500 
12501     // For big endian targets, swap the order of the pieces of each element.
12502     if (DAG.getDataLayout().isBigEndian())
12503       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
12504   }
12505 
12506   return DAG.getBuildVector(VT, DL, Ops);
12507 }
12508 
12509 static bool isContractable(SDNode *N) {
12510   SDNodeFlags F = N->getFlags();
12511   return F.hasAllowContract() || F.hasAllowReassociation();
12512 }
12513 
12514 /// Try to perform FMA combining on a given FADD node.
12515 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
12516   SDValue N0 = N->getOperand(0);
12517   SDValue N1 = N->getOperand(1);
12518   EVT VT = N->getValueType(0);
12519   SDLoc SL(N);
12520 
12521   const TargetOptions &Options = DAG.getTarget().Options;
12522 
12523   // Floating-point multiply-add with intermediate rounding.
12524   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12525 
12526   // Floating-point multiply-add without intermediate rounding.
12527   bool HasFMA =
12528       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12529       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12530 
12531   // No valid opcode, do not combine.
12532   if (!HasFMAD && !HasFMA)
12533     return SDValue();
12534 
12535   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12536   bool CanReassociate =
12537       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
12538   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12539                               CanFuse || HasFMAD);
12540   // If the addition is not contractable, do not combine.
12541   if (!AllowFusionGlobally && !isContractable(N))
12542     return SDValue();
12543 
12544   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12545     return SDValue();
12546 
12547   // Always prefer FMAD to FMA for precision.
12548   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12549   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12550 
12551   // Is the node an FMUL and contractable either due to global flags or
12552   // SDNodeFlags.
12553   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12554     if (N.getOpcode() != ISD::FMUL)
12555       return false;
12556     return AllowFusionGlobally || isContractable(N.getNode());
12557   };
12558   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
12559   // prefer to fold the multiply with fewer uses.
12560   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
12561     if (N0.getNode()->use_size() > N1.getNode()->use_size())
12562       std::swap(N0, N1);
12563   }
12564 
12565   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
12566   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
12567     return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12568                        N0.getOperand(1), N1);
12569   }
12570 
12571   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
12572   // Note: Commutes FADD operands.
12573   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
12574     return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
12575                        N1.getOperand(1), N0);
12576   }
12577 
12578   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
12579   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
12580   // This requires reassociation because it changes the order of operations.
12581   SDValue FMA, E;
12582   if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
12583       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
12584       N0.getOperand(2).hasOneUse()) {
12585     FMA = N0;
12586     E = N1;
12587   } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
12588              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
12589              N1.getOperand(2).hasOneUse()) {
12590     FMA = N1;
12591     E = N0;
12592   }
12593   if (FMA && E) {
12594     SDValue A = FMA.getOperand(0);
12595     SDValue B = FMA.getOperand(1);
12596     SDValue C = FMA.getOperand(2).getOperand(0);
12597     SDValue D = FMA.getOperand(2).getOperand(1);
12598     SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
12599     return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE);
12600   }
12601 
12602   // Look through FP_EXTEND nodes to do more combining.
12603 
12604   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
12605   if (N0.getOpcode() == ISD::FP_EXTEND) {
12606     SDValue N00 = N0.getOperand(0);
12607     if (isContractableFMUL(N00) &&
12608         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12609                             N00.getValueType())) {
12610       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12611                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12612                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12613                          N1);
12614     }
12615   }
12616 
12617   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
12618   // Note: Commutes FADD operands.
12619   if (N1.getOpcode() == ISD::FP_EXTEND) {
12620     SDValue N10 = N1.getOperand(0);
12621     if (isContractableFMUL(N10) &&
12622         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12623                             N10.getValueType())) {
12624       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12625                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
12626                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
12627                          N0);
12628     }
12629   }
12630 
12631   // More folding opportunities when target permits.
12632   if (Aggressive) {
12633     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
12634     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
12635     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12636                                     SDValue Z) {
12637       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
12638                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12639                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12640                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
12641                                      Z));
12642     };
12643     if (N0.getOpcode() == PreferredFusedOpcode) {
12644       SDValue N02 = N0.getOperand(2);
12645       if (N02.getOpcode() == ISD::FP_EXTEND) {
12646         SDValue N020 = N02.getOperand(0);
12647         if (isContractableFMUL(N020) &&
12648             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12649                                 N020.getValueType())) {
12650           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
12651                                       N020.getOperand(0), N020.getOperand(1),
12652                                       N1);
12653         }
12654       }
12655     }
12656 
12657     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
12658     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
12659     // FIXME: This turns two single-precision and one double-precision
12660     // operation into two double-precision operations, which might not be
12661     // interesting for all targets, especially GPUs.
12662     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
12663                                     SDValue Z) {
12664       return DAG.getNode(
12665           PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
12666           DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
12667           DAG.getNode(PreferredFusedOpcode, SL, VT,
12668                       DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
12669                       DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
12670     };
12671     if (N0.getOpcode() == ISD::FP_EXTEND) {
12672       SDValue N00 = N0.getOperand(0);
12673       if (N00.getOpcode() == PreferredFusedOpcode) {
12674         SDValue N002 = N00.getOperand(2);
12675         if (isContractableFMUL(N002) &&
12676             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12677                                 N00.getValueType())) {
12678           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
12679                                       N002.getOperand(0), N002.getOperand(1),
12680                                       N1);
12681         }
12682       }
12683     }
12684 
12685     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
12686     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
12687     if (N1.getOpcode() == PreferredFusedOpcode) {
12688       SDValue N12 = N1.getOperand(2);
12689       if (N12.getOpcode() == ISD::FP_EXTEND) {
12690         SDValue N120 = N12.getOperand(0);
12691         if (isContractableFMUL(N120) &&
12692             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12693                                 N120.getValueType())) {
12694           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
12695                                       N120.getOperand(0), N120.getOperand(1),
12696                                       N0);
12697         }
12698       }
12699     }
12700 
12701     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
12702     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
12703     // FIXME: This turns two single-precision and one double-precision
12704     // operation into two double-precision operations, which might not be
12705     // interesting for all targets, especially GPUs.
12706     if (N1.getOpcode() == ISD::FP_EXTEND) {
12707       SDValue N10 = N1.getOperand(0);
12708       if (N10.getOpcode() == PreferredFusedOpcode) {
12709         SDValue N102 = N10.getOperand(2);
12710         if (isContractableFMUL(N102) &&
12711             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12712                                 N10.getValueType())) {
12713           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
12714                                       N102.getOperand(0), N102.getOperand(1),
12715                                       N0);
12716         }
12717       }
12718     }
12719   }
12720 
12721   return SDValue();
12722 }
12723 
12724 /// Try to perform FMA combining on a given FSUB node.
12725 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
12726   SDValue N0 = N->getOperand(0);
12727   SDValue N1 = N->getOperand(1);
12728   EVT VT = N->getValueType(0);
12729   SDLoc SL(N);
12730 
12731   const TargetOptions &Options = DAG.getTarget().Options;
12732   // Floating-point multiply-add with intermediate rounding.
12733   bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
12734 
12735   // Floating-point multiply-add without intermediate rounding.
12736   bool HasFMA =
12737       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
12738       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
12739 
12740   // No valid opcode, do not combine.
12741   if (!HasFMAD && !HasFMA)
12742     return SDValue();
12743 
12744   const SDNodeFlags Flags = N->getFlags();
12745   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
12746   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
12747                               CanFuse || HasFMAD);
12748 
12749   // If the subtraction is not contractable, do not combine.
12750   if (!AllowFusionGlobally && !isContractable(N))
12751     return SDValue();
12752 
12753   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
12754     return SDValue();
12755 
12756   // Always prefer FMAD to FMA for precision.
12757   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
12758   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
12759   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
12760 
12761   // Is the node an FMUL and contractable either due to global flags or
12762   // SDNodeFlags.
12763   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
12764     if (N.getOpcode() != ISD::FMUL)
12765       return false;
12766     return AllowFusionGlobally || isContractable(N.getNode());
12767   };
12768 
12769   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12770   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
12771     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
12772       return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
12773                          XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
12774     }
12775     return SDValue();
12776   };
12777 
12778   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12779   // Note: Commutes FSUB operands.
12780   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
12781     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
12782       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12783                          DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
12784                          YZ.getOperand(1), X);
12785     }
12786     return SDValue();
12787   };
12788 
12789   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
12790   // prefer to fold the multiply with fewer uses.
12791   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
12792       (N0.getNode()->use_size() > N1.getNode()->use_size())) {
12793     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
12794     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12795       return V;
12796     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
12797     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12798       return V;
12799   } else {
12800     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12801     if (SDValue V = tryToFoldXYSubZ(N0, N1))
12802       return V;
12803     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
12804     if (SDValue V = tryToFoldXSubYZ(N0, N1))
12805       return V;
12806   }
12807 
12808   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
12809   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
12810       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
12811     SDValue N00 = N0.getOperand(0).getOperand(0);
12812     SDValue N01 = N0.getOperand(0).getOperand(1);
12813     return DAG.getNode(PreferredFusedOpcode, SL, VT,
12814                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
12815                        DAG.getNode(ISD::FNEG, SL, VT, N1));
12816   }
12817 
12818   // Look through FP_EXTEND nodes to do more combining.
12819 
12820   // fold (fsub (fpext (fmul x, y)), z)
12821   //   -> (fma (fpext x), (fpext y), (fneg z))
12822   if (N0.getOpcode() == ISD::FP_EXTEND) {
12823     SDValue N00 = N0.getOperand(0);
12824     if (isContractableFMUL(N00) &&
12825         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12826                             N00.getValueType())) {
12827       return DAG.getNode(PreferredFusedOpcode, SL, VT,
12828                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12829                          DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12830                          DAG.getNode(ISD::FNEG, SL, VT, N1));
12831     }
12832   }
12833 
12834   // fold (fsub x, (fpext (fmul y, z)))
12835   //   -> (fma (fneg (fpext y)), (fpext z), x)
12836   // Note: Commutes FSUB operands.
12837   if (N1.getOpcode() == ISD::FP_EXTEND) {
12838     SDValue N10 = N1.getOperand(0);
12839     if (isContractableFMUL(N10) &&
12840         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12841                             N10.getValueType())) {
12842       return DAG.getNode(
12843           PreferredFusedOpcode, SL, VT,
12844           DAG.getNode(ISD::FNEG, SL, VT,
12845                       DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
12846           DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
12847     }
12848   }
12849 
12850   // fold (fsub (fpext (fneg (fmul, x, y))), z)
12851   //   -> (fneg (fma (fpext x), (fpext y), z))
12852   // Note: This could be removed with appropriate canonicalization of the
12853   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12854   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12855   // from implementing the canonicalization in visitFSUB.
12856   if (N0.getOpcode() == ISD::FP_EXTEND) {
12857     SDValue N00 = N0.getOperand(0);
12858     if (N00.getOpcode() == ISD::FNEG) {
12859       SDValue N000 = N00.getOperand(0);
12860       if (isContractableFMUL(N000) &&
12861           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12862                               N00.getValueType())) {
12863         return DAG.getNode(
12864             ISD::FNEG, SL, VT,
12865             DAG.getNode(PreferredFusedOpcode, SL, VT,
12866                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
12867                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
12868                         N1));
12869       }
12870     }
12871   }
12872 
12873   // fold (fsub (fneg (fpext (fmul, x, y))), z)
12874   //   -> (fneg (fma (fpext x)), (fpext y), z)
12875   // Note: This could be removed with appropriate canonicalization of the
12876   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
12877   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
12878   // from implementing the canonicalization in visitFSUB.
12879   if (N0.getOpcode() == ISD::FNEG) {
12880     SDValue N00 = N0.getOperand(0);
12881     if (N00.getOpcode() == ISD::FP_EXTEND) {
12882       SDValue N000 = N00.getOperand(0);
12883       if (isContractableFMUL(N000) &&
12884           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12885                               N000.getValueType())) {
12886         return DAG.getNode(
12887             ISD::FNEG, SL, VT,
12888             DAG.getNode(PreferredFusedOpcode, SL, VT,
12889                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
12890                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
12891                         N1));
12892       }
12893     }
12894   }
12895 
12896   // More folding opportunities when target permits.
12897   if (Aggressive) {
12898     // fold (fsub (fma x, y, (fmul u, v)), z)
12899     //   -> (fma x, y (fma u, v, (fneg z)))
12900     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
12901         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
12902         N0.getOperand(2)->hasOneUse()) {
12903       return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
12904                          N0.getOperand(1),
12905                          DAG.getNode(PreferredFusedOpcode, SL, VT,
12906                                      N0.getOperand(2).getOperand(0),
12907                                      N0.getOperand(2).getOperand(1),
12908                                      DAG.getNode(ISD::FNEG, SL, VT, N1)));
12909     }
12910 
12911     // fold (fsub x, (fma y, z, (fmul u, v)))
12912     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
12913     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
12914         isContractableFMUL(N1.getOperand(2)) &&
12915         N1->hasOneUse() && NoSignedZero) {
12916       SDValue N20 = N1.getOperand(2).getOperand(0);
12917       SDValue N21 = N1.getOperand(2).getOperand(1);
12918       return DAG.getNode(
12919           PreferredFusedOpcode, SL, VT,
12920           DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
12921           DAG.getNode(PreferredFusedOpcode, SL, VT,
12922                       DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
12923     }
12924 
12925 
12926     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
12927     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
12928     if (N0.getOpcode() == PreferredFusedOpcode &&
12929         N0->hasOneUse()) {
12930       SDValue N02 = N0.getOperand(2);
12931       if (N02.getOpcode() == ISD::FP_EXTEND) {
12932         SDValue N020 = N02.getOperand(0);
12933         if (isContractableFMUL(N020) &&
12934             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12935                                 N020.getValueType())) {
12936           return DAG.getNode(
12937               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
12938               DAG.getNode(
12939                   PreferredFusedOpcode, SL, VT,
12940                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
12941                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
12942                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
12943         }
12944       }
12945     }
12946 
12947     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
12948     //   -> (fma (fpext x), (fpext y),
12949     //           (fma (fpext u), (fpext v), (fneg z)))
12950     // FIXME: This turns two single-precision and one double-precision
12951     // operation into two double-precision operations, which might not be
12952     // interesting for all targets, especially GPUs.
12953     if (N0.getOpcode() == ISD::FP_EXTEND) {
12954       SDValue N00 = N0.getOperand(0);
12955       if (N00.getOpcode() == PreferredFusedOpcode) {
12956         SDValue N002 = N00.getOperand(2);
12957         if (isContractableFMUL(N002) &&
12958             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12959                                 N00.getValueType())) {
12960           return DAG.getNode(
12961               PreferredFusedOpcode, SL, VT,
12962               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
12963               DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
12964               DAG.getNode(
12965                   PreferredFusedOpcode, SL, VT,
12966                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
12967                   DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
12968                   DAG.getNode(ISD::FNEG, SL, VT, N1)));
12969         }
12970       }
12971     }
12972 
12973     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
12974     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
12975     if (N1.getOpcode() == PreferredFusedOpcode &&
12976         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
12977         N1->hasOneUse()) {
12978       SDValue N120 = N1.getOperand(2).getOperand(0);
12979       if (isContractableFMUL(N120) &&
12980           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
12981                               N120.getValueType())) {
12982         SDValue N1200 = N120.getOperand(0);
12983         SDValue N1201 = N120.getOperand(1);
12984         return DAG.getNode(
12985             PreferredFusedOpcode, SL, VT,
12986             DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
12987             DAG.getNode(PreferredFusedOpcode, SL, VT,
12988                         DAG.getNode(ISD::FNEG, SL, VT,
12989                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
12990                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
12991       }
12992     }
12993 
12994     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
12995     //   -> (fma (fneg (fpext y)), (fpext z),
12996     //           (fma (fneg (fpext u)), (fpext v), x))
12997     // FIXME: This turns two single-precision and one double-precision
12998     // operation into two double-precision operations, which might not be
12999     // interesting for all targets, especially GPUs.
13000     if (N1.getOpcode() == ISD::FP_EXTEND &&
13001         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
13002       SDValue CvtSrc = N1.getOperand(0);
13003       SDValue N100 = CvtSrc.getOperand(0);
13004       SDValue N101 = CvtSrc.getOperand(1);
13005       SDValue N102 = CvtSrc.getOperand(2);
13006       if (isContractableFMUL(N102) &&
13007           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
13008                               CvtSrc.getValueType())) {
13009         SDValue N1020 = N102.getOperand(0);
13010         SDValue N1021 = N102.getOperand(1);
13011         return DAG.getNode(
13012             PreferredFusedOpcode, SL, VT,
13013             DAG.getNode(ISD::FNEG, SL, VT,
13014                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
13015             DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
13016             DAG.getNode(PreferredFusedOpcode, SL, VT,
13017                         DAG.getNode(ISD::FNEG, SL, VT,
13018                                     DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
13019                         DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
13020       }
13021     }
13022   }
13023 
13024   return SDValue();
13025 }
13026 
13027 /// Try to perform FMA combining on a given FMUL node based on the distributive
13028 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
13029 /// subtraction instead of addition).
13030 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
13031   SDValue N0 = N->getOperand(0);
13032   SDValue N1 = N->getOperand(1);
13033   EVT VT = N->getValueType(0);
13034   SDLoc SL(N);
13035 
13036   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
13037 
13038   const TargetOptions &Options = DAG.getTarget().Options;
13039 
13040   // The transforms below are incorrect when x == 0 and y == inf, because the
13041   // intermediate multiplication produces a nan.
13042   if (!Options.NoInfsFPMath)
13043     return SDValue();
13044 
13045   // Floating-point multiply-add without intermediate rounding.
13046   bool HasFMA =
13047       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
13048       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
13049       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
13050 
13051   // Floating-point multiply-add with intermediate rounding. This can result
13052   // in a less precise result due to the changed rounding order.
13053   bool HasFMAD = Options.UnsafeFPMath &&
13054                  (LegalOperations && TLI.isFMADLegal(DAG, N));
13055 
13056   // No valid opcode, do not combine.
13057   if (!HasFMAD && !HasFMA)
13058     return SDValue();
13059 
13060   // Always prefer FMAD to FMA for precision.
13061   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
13062   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
13063 
13064   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
13065   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
13066   auto FuseFADD = [&](SDValue X, SDValue Y) {
13067     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
13068       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
13069         if (C->isExactlyValue(+1.0))
13070           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13071                              Y);
13072         if (C->isExactlyValue(-1.0))
13073           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13074                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13075       }
13076     }
13077     return SDValue();
13078   };
13079 
13080   if (SDValue FMA = FuseFADD(N0, N1))
13081     return FMA;
13082   if (SDValue FMA = FuseFADD(N1, N0))
13083     return FMA;
13084 
13085   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
13086   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
13087   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
13088   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
13089   auto FuseFSUB = [&](SDValue X, SDValue Y) {
13090     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
13091       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
13092         if (C0->isExactlyValue(+1.0))
13093           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13094                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13095                              Y);
13096         if (C0->isExactlyValue(-1.0))
13097           return DAG.getNode(PreferredFusedOpcode, SL, VT,
13098                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
13099                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13100       }
13101       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
13102         if (C1->isExactlyValue(+1.0))
13103           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13104                              DAG.getNode(ISD::FNEG, SL, VT, Y));
13105         if (C1->isExactlyValue(-1.0))
13106           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
13107                              Y);
13108       }
13109     }
13110     return SDValue();
13111   };
13112 
13113   if (SDValue FMA = FuseFSUB(N0, N1))
13114     return FMA;
13115   if (SDValue FMA = FuseFSUB(N1, N0))
13116     return FMA;
13117 
13118   return SDValue();
13119 }
13120 
13121 SDValue DAGCombiner::visitFADD(SDNode *N) {
13122   SDValue N0 = N->getOperand(0);
13123   SDValue N1 = N->getOperand(1);
13124   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13125   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13126   EVT VT = N->getValueType(0);
13127   SDLoc DL(N);
13128   const TargetOptions &Options = DAG.getTarget().Options;
13129   SDNodeFlags Flags = N->getFlags();
13130   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13131 
13132   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13133     return R;
13134 
13135   // fold vector ops
13136   if (VT.isVector())
13137     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13138       return FoldedVOp;
13139 
13140   // fold (fadd c1, c2) -> c1 + c2
13141   if (N0CFP && N1CFP)
13142     return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
13143 
13144   // canonicalize constant to RHS
13145   if (N0CFP && !N1CFP)
13146     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
13147 
13148   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
13149   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
13150   if (N1C && N1C->isZero())
13151     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
13152       return N0;
13153 
13154   if (SDValue NewSel = foldBinOpIntoSelect(N))
13155     return NewSel;
13156 
13157   // fold (fadd A, (fneg B)) -> (fsub A, B)
13158   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13159     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13160             N1, DAG, LegalOperations, ForCodeSize))
13161       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
13162 
13163   // fold (fadd (fneg A), B) -> (fsub B, A)
13164   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
13165     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13166             N0, DAG, LegalOperations, ForCodeSize))
13167       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
13168 
13169   auto isFMulNegTwo = [](SDValue FMul) {
13170     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
13171       return false;
13172     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
13173     return C && C->isExactlyValue(-2.0);
13174   };
13175 
13176   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
13177   if (isFMulNegTwo(N0)) {
13178     SDValue B = N0.getOperand(0);
13179     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13180     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
13181   }
13182   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
13183   if (isFMulNegTwo(N1)) {
13184     SDValue B = N1.getOperand(0);
13185     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
13186     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
13187   }
13188 
13189   // No FP constant should be created after legalization as Instruction
13190   // Selection pass has a hard time dealing with FP constants.
13191   bool AllowNewConst = (Level < AfterLegalizeDAG);
13192 
13193   // If nnan is enabled, fold lots of things.
13194   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
13195     // If allowed, fold (fadd (fneg x), x) -> 0.0
13196     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
13197       return DAG.getConstantFP(0.0, DL, VT);
13198 
13199     // If allowed, fold (fadd x, (fneg x)) -> 0.0
13200     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
13201       return DAG.getConstantFP(0.0, DL, VT);
13202   }
13203 
13204   // If 'unsafe math' or reassoc and nsz, fold lots of things.
13205   // TODO: break out portions of the transformations below for which Unsafe is
13206   //       considered and which do not require both nsz and reassoc
13207   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13208        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13209       AllowNewConst) {
13210     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
13211     if (N1CFP && N0.getOpcode() == ISD::FADD &&
13212         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13213       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
13214       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
13215     }
13216 
13217     // We can fold chains of FADD's of the same value into multiplications.
13218     // This transform is not safe in general because we are reducing the number
13219     // of rounding steps.
13220     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
13221       if (N0.getOpcode() == ISD::FMUL) {
13222         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13223         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
13224 
13225         // (fadd (fmul x, c), x) -> (fmul x, c+1)
13226         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
13227           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13228                                        DAG.getConstantFP(1.0, DL, VT));
13229           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
13230         }
13231 
13232         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
13233         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
13234             N1.getOperand(0) == N1.getOperand(1) &&
13235             N0.getOperand(0) == N1.getOperand(0)) {
13236           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
13237                                        DAG.getConstantFP(2.0, DL, VT));
13238           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
13239         }
13240       }
13241 
13242       if (N1.getOpcode() == ISD::FMUL) {
13243         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13244         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
13245 
13246         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
13247         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
13248           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13249                                        DAG.getConstantFP(1.0, DL, VT));
13250           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
13251         }
13252 
13253         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
13254         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
13255             N0.getOperand(0) == N0.getOperand(1) &&
13256             N1.getOperand(0) == N0.getOperand(0)) {
13257           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
13258                                        DAG.getConstantFP(2.0, DL, VT));
13259           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
13260         }
13261       }
13262 
13263       if (N0.getOpcode() == ISD::FADD) {
13264         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
13265         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
13266         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
13267             (N0.getOperand(0) == N1)) {
13268           return DAG.getNode(ISD::FMUL, DL, VT, N1,
13269                              DAG.getConstantFP(3.0, DL, VT));
13270         }
13271       }
13272 
13273       if (N1.getOpcode() == ISD::FADD) {
13274         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
13275         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
13276         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
13277             N1.getOperand(0) == N0) {
13278           return DAG.getNode(ISD::FMUL, DL, VT, N0,
13279                              DAG.getConstantFP(3.0, DL, VT));
13280         }
13281       }
13282 
13283       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
13284       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
13285           N0.getOperand(0) == N0.getOperand(1) &&
13286           N1.getOperand(0) == N1.getOperand(1) &&
13287           N0.getOperand(0) == N1.getOperand(0)) {
13288         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
13289                            DAG.getConstantFP(4.0, DL, VT));
13290       }
13291     }
13292   } // enable-unsafe-fp-math
13293 
13294   // FADD -> FMA combines:
13295   if (SDValue Fused = visitFADDForFMACombine(N)) {
13296     AddToWorklist(Fused.getNode());
13297     return Fused;
13298   }
13299   return SDValue();
13300 }
13301 
13302 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
13303   SDValue Chain = N->getOperand(0);
13304   SDValue N0 = N->getOperand(1);
13305   SDValue N1 = N->getOperand(2);
13306   EVT VT = N->getValueType(0);
13307   EVT ChainVT = N->getValueType(1);
13308   SDLoc DL(N);
13309   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13310 
13311   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
13312   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13313     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
13314             N1, DAG, LegalOperations, ForCodeSize)) {
13315       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13316                          {Chain, N0, NegN1});
13317     }
13318 
13319   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
13320   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
13321     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
13322             N0, DAG, LegalOperations, ForCodeSize)) {
13323       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
13324                          {Chain, N1, NegN0});
13325     }
13326   return SDValue();
13327 }
13328 
13329 SDValue DAGCombiner::visitFSUB(SDNode *N) {
13330   SDValue N0 = N->getOperand(0);
13331   SDValue N1 = N->getOperand(1);
13332   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13333   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13334   EVT VT = N->getValueType(0);
13335   SDLoc DL(N);
13336   const TargetOptions &Options = DAG.getTarget().Options;
13337   const SDNodeFlags Flags = N->getFlags();
13338   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13339 
13340   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13341     return R;
13342 
13343   // fold vector ops
13344   if (VT.isVector())
13345     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13346       return FoldedVOp;
13347 
13348   // fold (fsub c1, c2) -> c1-c2
13349   if (N0CFP && N1CFP)
13350     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1);
13351 
13352   if (SDValue NewSel = foldBinOpIntoSelect(N))
13353     return NewSel;
13354 
13355   // (fsub A, 0) -> A
13356   if (N1CFP && N1CFP->isZero()) {
13357     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
13358         Flags.hasNoSignedZeros()) {
13359       return N0;
13360     }
13361   }
13362 
13363   if (N0 == N1) {
13364     // (fsub x, x) -> 0.0
13365     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
13366       return DAG.getConstantFP(0.0f, DL, VT);
13367   }
13368 
13369   // (fsub -0.0, N1) -> -N1
13370   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
13371   //       FSUB does not specify the sign bit of a NaN. Also note that for
13372   //       the same reason, the inverse transform is not safe, unless fast math
13373   //       flags are in play.
13374   if (N0CFP && N0CFP->isZero()) {
13375     if (N0CFP->isNegative() ||
13376         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
13377       if (SDValue NegN1 =
13378               TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13379         return NegN1;
13380       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13381         return DAG.getNode(ISD::FNEG, DL, VT, N1);
13382     }
13383   }
13384 
13385   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
13386        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
13387       N1.getOpcode() == ISD::FADD) {
13388     // X - (X + Y) -> -Y
13389     if (N0 == N1->getOperand(0))
13390       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
13391     // X - (Y + X) -> -Y
13392     if (N0 == N1->getOperand(1))
13393       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
13394   }
13395 
13396   // fold (fsub A, (fneg B)) -> (fadd A, B)
13397   if (SDValue NegN1 =
13398           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
13399     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
13400 
13401   // FSUB -> FMA combines:
13402   if (SDValue Fused = visitFSUBForFMACombine(N)) {
13403     AddToWorklist(Fused.getNode());
13404     return Fused;
13405   }
13406 
13407   return SDValue();
13408 }
13409 
13410 SDValue DAGCombiner::visitFMUL(SDNode *N) {
13411   SDValue N0 = N->getOperand(0);
13412   SDValue N1 = N->getOperand(1);
13413   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
13414   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
13415   EVT VT = N->getValueType(0);
13416   SDLoc DL(N);
13417   const TargetOptions &Options = DAG.getTarget().Options;
13418   const SDNodeFlags Flags = N->getFlags();
13419   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13420 
13421   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13422     return R;
13423 
13424   // fold vector ops
13425   if (VT.isVector()) {
13426     // This just handles C1 * C2 for vectors. Other vector folds are below.
13427     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13428       return FoldedVOp;
13429   }
13430 
13431   // fold (fmul c1, c2) -> c1*c2
13432   if (N0CFP && N1CFP)
13433     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
13434 
13435   // canonicalize constant to RHS
13436   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13437      !isConstantFPBuildVectorOrConstantFP(N1))
13438     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
13439 
13440   if (SDValue NewSel = foldBinOpIntoSelect(N))
13441     return NewSel;
13442 
13443   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
13444     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
13445     if (isConstantFPBuildVectorOrConstantFP(N1) &&
13446         N0.getOpcode() == ISD::FMUL) {
13447       SDValue N00 = N0.getOperand(0);
13448       SDValue N01 = N0.getOperand(1);
13449       // Avoid an infinite loop by making sure that N00 is not a constant
13450       // (the inner multiply has not been constant folded yet).
13451       if (isConstantFPBuildVectorOrConstantFP(N01) &&
13452           !isConstantFPBuildVectorOrConstantFP(N00)) {
13453         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
13454         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
13455       }
13456     }
13457 
13458     // Match a special-case: we convert X * 2.0 into fadd.
13459     // fmul (fadd X, X), C -> fmul X, 2.0 * C
13460     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
13461         N0.getOperand(0) == N0.getOperand(1)) {
13462       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
13463       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
13464       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
13465     }
13466   }
13467 
13468   // fold (fmul X, 2.0) -> (fadd X, X)
13469   if (N1CFP && N1CFP->isExactlyValue(+2.0))
13470     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
13471 
13472   // fold (fmul X, -1.0) -> (fneg X)
13473   if (N1CFP && N1CFP->isExactlyValue(-1.0))
13474     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13475       return DAG.getNode(ISD::FNEG, DL, VT, N0);
13476 
13477   // -N0 * -N1 --> N0 * N1
13478   TargetLowering::NegatibleCost CostN0 =
13479       TargetLowering::NegatibleCost::Expensive;
13480   TargetLowering::NegatibleCost CostN1 =
13481       TargetLowering::NegatibleCost::Expensive;
13482   SDValue NegN0 =
13483       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13484   SDValue NegN1 =
13485       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13486   if (NegN0 && NegN1 &&
13487       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13488        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13489     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
13490 
13491   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
13492   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
13493   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
13494       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
13495       TLI.isOperationLegal(ISD::FABS, VT)) {
13496     SDValue Select = N0, X = N1;
13497     if (Select.getOpcode() != ISD::SELECT)
13498       std::swap(Select, X);
13499 
13500     SDValue Cond = Select.getOperand(0);
13501     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
13502     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
13503 
13504     if (TrueOpnd && FalseOpnd &&
13505         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
13506         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
13507         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
13508       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
13509       switch (CC) {
13510       default: break;
13511       case ISD::SETOLT:
13512       case ISD::SETULT:
13513       case ISD::SETOLE:
13514       case ISD::SETULE:
13515       case ISD::SETLT:
13516       case ISD::SETLE:
13517         std::swap(TrueOpnd, FalseOpnd);
13518         LLVM_FALLTHROUGH;
13519       case ISD::SETOGT:
13520       case ISD::SETUGT:
13521       case ISD::SETOGE:
13522       case ISD::SETUGE:
13523       case ISD::SETGT:
13524       case ISD::SETGE:
13525         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
13526             TLI.isOperationLegal(ISD::FNEG, VT))
13527           return DAG.getNode(ISD::FNEG, DL, VT,
13528                    DAG.getNode(ISD::FABS, DL, VT, X));
13529         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
13530           return DAG.getNode(ISD::FABS, DL, VT, X);
13531 
13532         break;
13533       }
13534     }
13535   }
13536 
13537   // FMUL -> FMA combines:
13538   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
13539     AddToWorklist(Fused.getNode());
13540     return Fused;
13541   }
13542 
13543   return SDValue();
13544 }
13545 
13546 SDValue DAGCombiner::visitFMA(SDNode *N) {
13547   SDValue N0 = N->getOperand(0);
13548   SDValue N1 = N->getOperand(1);
13549   SDValue N2 = N->getOperand(2);
13550   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13551   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13552   EVT VT = N->getValueType(0);
13553   SDLoc DL(N);
13554   const TargetOptions &Options = DAG.getTarget().Options;
13555   // FMA nodes have flags that propagate to the created nodes.
13556   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13557 
13558   bool UnsafeFPMath =
13559       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
13560 
13561   // Constant fold FMA.
13562   if (isa<ConstantFPSDNode>(N0) &&
13563       isa<ConstantFPSDNode>(N1) &&
13564       isa<ConstantFPSDNode>(N2)) {
13565     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
13566   }
13567 
13568   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
13569   TargetLowering::NegatibleCost CostN0 =
13570       TargetLowering::NegatibleCost::Expensive;
13571   TargetLowering::NegatibleCost CostN1 =
13572       TargetLowering::NegatibleCost::Expensive;
13573   SDValue NegN0 =
13574       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13575   SDValue NegN1 =
13576       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13577   if (NegN0 && NegN1 &&
13578       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13579        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13580     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
13581 
13582   if (UnsafeFPMath) {
13583     if (N0CFP && N0CFP->isZero())
13584       return N2;
13585     if (N1CFP && N1CFP->isZero())
13586       return N2;
13587   }
13588 
13589   if (N0CFP && N0CFP->isExactlyValue(1.0))
13590     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
13591   if (N1CFP && N1CFP->isExactlyValue(1.0))
13592     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
13593 
13594   // Canonicalize (fma c, x, y) -> (fma x, c, y)
13595   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13596      !isConstantFPBuildVectorOrConstantFP(N1))
13597     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
13598 
13599   if (UnsafeFPMath) {
13600     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
13601     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
13602         isConstantFPBuildVectorOrConstantFP(N1) &&
13603         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
13604       return DAG.getNode(ISD::FMUL, DL, VT, N0,
13605                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
13606     }
13607 
13608     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
13609     if (N0.getOpcode() == ISD::FMUL &&
13610         isConstantFPBuildVectorOrConstantFP(N1) &&
13611         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
13612       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13613                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
13614                          N2);
13615     }
13616   }
13617 
13618   // (fma x, -1, y) -> (fadd (fneg x), y)
13619   if (N1CFP) {
13620     if (N1CFP->isExactlyValue(1.0))
13621       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
13622 
13623     if (N1CFP->isExactlyValue(-1.0) &&
13624         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
13625       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
13626       AddToWorklist(RHSNeg.getNode());
13627       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
13628     }
13629 
13630     // fma (fneg x), K, y -> fma x -K, y
13631     if (N0.getOpcode() == ISD::FNEG &&
13632         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13633          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
13634                                               ForCodeSize)))) {
13635       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
13636                          DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
13637     }
13638   }
13639 
13640   if (UnsafeFPMath) {
13641     // (fma x, c, x) -> (fmul x, (c+1))
13642     if (N1CFP && N0 == N2) {
13643       return DAG.getNode(
13644           ISD::FMUL, DL, VT, N0,
13645           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
13646     }
13647 
13648     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
13649     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
13650       return DAG.getNode(
13651           ISD::FMUL, DL, VT, N0,
13652           DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
13653     }
13654   }
13655 
13656   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
13657   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
13658   if (!TLI.isFNegFree(VT))
13659     if (SDValue Neg = TLI.getCheaperNegatedExpression(
13660             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
13661       return DAG.getNode(ISD::FNEG, DL, VT, Neg);
13662   return SDValue();
13663 }
13664 
13665 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13666 // reciprocal.
13667 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
13668 // Notice that this is not always beneficial. One reason is different targets
13669 // may have different costs for FDIV and FMUL, so sometimes the cost of two
13670 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
13671 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
13672 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
13673   // TODO: Limit this transform based on optsize/minsize - it always creates at
13674   //       least 1 extra instruction. But the perf win may be substantial enough
13675   //       that only minsize should restrict this.
13676   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
13677   const SDNodeFlags Flags = N->getFlags();
13678   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
13679     return SDValue();
13680 
13681   // Skip if current node is a reciprocal/fneg-reciprocal.
13682   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13683   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
13684   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
13685     return SDValue();
13686 
13687   // Exit early if the target does not want this transform or if there can't
13688   // possibly be enough uses of the divisor to make the transform worthwhile.
13689   unsigned MinUses = TLI.combineRepeatedFPDivisors();
13690 
13691   // For splat vectors, scale the number of uses by the splat factor. If we can
13692   // convert the division into a scalar op, that will likely be much faster.
13693   unsigned NumElts = 1;
13694   EVT VT = N->getValueType(0);
13695   if (VT.isVector() && DAG.isSplatValue(N1))
13696     NumElts = VT.getVectorNumElements();
13697 
13698   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
13699     return SDValue();
13700 
13701   // Find all FDIV users of the same divisor.
13702   // Use a set because duplicates may be present in the user list.
13703   SetVector<SDNode *> Users;
13704   for (auto *U : N1->uses()) {
13705     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
13706       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
13707       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
13708           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
13709           U->getFlags().hasAllowReassociation() &&
13710           U->getFlags().hasNoSignedZeros())
13711         continue;
13712 
13713       // This division is eligible for optimization only if global unsafe math
13714       // is enabled or if this division allows reciprocal formation.
13715       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
13716         Users.insert(U);
13717     }
13718   }
13719 
13720   // Now that we have the actual number of divisor uses, make sure it meets
13721   // the minimum threshold specified by the target.
13722   if ((Users.size() * NumElts) < MinUses)
13723     return SDValue();
13724 
13725   SDLoc DL(N);
13726   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13727   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
13728 
13729   // Dividend / Divisor -> Dividend * Reciprocal
13730   for (auto *U : Users) {
13731     SDValue Dividend = U->getOperand(0);
13732     if (Dividend != FPOne) {
13733       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
13734                                     Reciprocal, Flags);
13735       CombineTo(U, NewNode);
13736     } else if (U != Reciprocal.getNode()) {
13737       // In the absence of fast-math-flags, this user node is always the
13738       // same node as Reciprocal, but with FMF they may be different nodes.
13739       CombineTo(U, Reciprocal);
13740     }
13741   }
13742   return SDValue(N, 0);  // N was replaced.
13743 }
13744 
13745 SDValue DAGCombiner::visitFDIV(SDNode *N) {
13746   SDValue N0 = N->getOperand(0);
13747   SDValue N1 = N->getOperand(1);
13748   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13749   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13750   EVT VT = N->getValueType(0);
13751   SDLoc DL(N);
13752   const TargetOptions &Options = DAG.getTarget().Options;
13753   SDNodeFlags Flags = N->getFlags();
13754   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13755 
13756   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13757     return R;
13758 
13759   // fold vector ops
13760   if (VT.isVector())
13761     if (SDValue FoldedVOp = SimplifyVBinOp(N))
13762       return FoldedVOp;
13763 
13764   // fold (fdiv c1, c2) -> c1/c2
13765   if (N0CFP && N1CFP)
13766     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
13767 
13768   if (SDValue NewSel = foldBinOpIntoSelect(N))
13769     return NewSel;
13770 
13771   if (SDValue V = combineRepeatedFPDivisors(N))
13772     return V;
13773 
13774   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
13775     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
13776     if (N1CFP) {
13777       // Compute the reciprocal 1.0 / c2.
13778       const APFloat &N1APF = N1CFP->getValueAPF();
13779       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
13780       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
13781       // Only do the transform if the reciprocal is a legal fp immediate that
13782       // isn't too nasty (eg NaN, denormal, ...).
13783       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
13784           (!LegalOperations ||
13785            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
13786            // backend)... we should handle this gracefully after Legalize.
13787            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
13788            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
13789            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
13790         return DAG.getNode(ISD::FMUL, DL, VT, N0,
13791                            DAG.getConstantFP(Recip, DL, VT));
13792     }
13793 
13794     // If this FDIV is part of a reciprocal square root, it may be folded
13795     // into a target-specific square root estimate instruction.
13796     if (N1.getOpcode() == ISD::FSQRT) {
13797       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
13798         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13799     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
13800                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13801       if (SDValue RV =
13802               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
13803         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
13804         AddToWorklist(RV.getNode());
13805         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13806       }
13807     } else if (N1.getOpcode() == ISD::FP_ROUND &&
13808                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13809       if (SDValue RV =
13810               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
13811         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
13812         AddToWorklist(RV.getNode());
13813         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
13814       }
13815     } else if (N1.getOpcode() == ISD::FMUL) {
13816       // Look through an FMUL. Even though this won't remove the FDIV directly,
13817       // it's still worthwhile to get rid of the FSQRT if possible.
13818       SDValue Sqrt, Y;
13819       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
13820         Sqrt = N1.getOperand(0);
13821         Y = N1.getOperand(1);
13822       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
13823         Sqrt = N1.getOperand(1);
13824         Y = N1.getOperand(0);
13825       }
13826       if (Sqrt.getNode()) {
13827         // If the other multiply operand is known positive, pull it into the
13828         // sqrt. That will eliminate the division if we convert to an estimate.
13829         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
13830             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
13831           SDValue A;
13832           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
13833             A = Y.getOperand(0);
13834           else if (Y == Sqrt.getOperand(0))
13835             A = Y;
13836           if (A) {
13837             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
13838             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
13839             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
13840             SDValue AAZ =
13841                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
13842             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
13843               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
13844 
13845             // Estimate creation failed. Clean up speculatively created nodes.
13846             recursivelyDeleteUnusedNodes(AAZ.getNode());
13847           }
13848         }
13849 
13850         // We found a FSQRT, so try to make this fold:
13851         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
13852         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
13853           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
13854           AddToWorklist(Div.getNode());
13855           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
13856         }
13857       }
13858     }
13859 
13860     // Fold into a reciprocal estimate and multiply instead of a real divide.
13861     if (Options.NoInfsFPMath || Flags.hasNoInfs())
13862       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
13863         return RV;
13864   }
13865 
13866   // Fold X/Sqrt(X) -> Sqrt(X)
13867   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
13868       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
13869     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
13870       return N1;
13871 
13872   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
13873   TargetLowering::NegatibleCost CostN0 =
13874       TargetLowering::NegatibleCost::Expensive;
13875   TargetLowering::NegatibleCost CostN1 =
13876       TargetLowering::NegatibleCost::Expensive;
13877   SDValue NegN0 =
13878       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
13879   SDValue NegN1 =
13880       TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
13881   if (NegN0 && NegN1 &&
13882       (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
13883        CostN1 == TargetLowering::NegatibleCost::Cheaper))
13884     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
13885 
13886   return SDValue();
13887 }
13888 
13889 SDValue DAGCombiner::visitFREM(SDNode *N) {
13890   SDValue N0 = N->getOperand(0);
13891   SDValue N1 = N->getOperand(1);
13892   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13893   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
13894   EVT VT = N->getValueType(0);
13895   SDNodeFlags Flags = N->getFlags();
13896   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
13897 
13898   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
13899     return R;
13900 
13901   // fold (frem c1, c2) -> fmod(c1,c2)
13902   if (N0CFP && N1CFP)
13903     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
13904 
13905   if (SDValue NewSel = foldBinOpIntoSelect(N))
13906     return NewSel;
13907 
13908   return SDValue();
13909 }
13910 
13911 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
13912   SDNodeFlags Flags = N->getFlags();
13913   const TargetOptions &Options = DAG.getTarget().Options;
13914 
13915   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
13916   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
13917   if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
13918       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
13919     return SDValue();
13920 
13921   SDValue N0 = N->getOperand(0);
13922   if (TLI.isFsqrtCheap(N0, DAG))
13923     return SDValue();
13924 
13925   // FSQRT nodes have flags that propagate to the created nodes.
13926   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
13927   //       transform the fdiv, we may produce a sub-optimal estimate sequence
13928   //       because the reciprocal calculation may not have to filter out a
13929   //       0.0 input.
13930   return buildSqrtEstimate(N0, Flags);
13931 }
13932 
13933 /// copysign(x, fp_extend(y)) -> copysign(x, y)
13934 /// copysign(x, fp_round(y)) -> copysign(x, y)
13935 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
13936   SDValue N1 = N->getOperand(1);
13937   if ((N1.getOpcode() == ISD::FP_EXTEND ||
13938        N1.getOpcode() == ISD::FP_ROUND)) {
13939     // Do not optimize out type conversion of f128 type yet.
13940     // For some targets like x86_64, configuration is changed to keep one f128
13941     // value in one SSE register, but instruction selection cannot handle
13942     // FCOPYSIGN on SSE registers yet.
13943     EVT N1VT = N1->getValueType(0);
13944     EVT N1Op0VT = N1->getOperand(0).getValueType();
13945     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
13946   }
13947   return false;
13948 }
13949 
13950 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
13951   SDValue N0 = N->getOperand(0);
13952   SDValue N1 = N->getOperand(1);
13953   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
13954   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
13955   EVT VT = N->getValueType(0);
13956 
13957   if (N0CFP && N1CFP) // Constant fold
13958     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
13959 
13960   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
13961     const APFloat &V = N1C->getValueAPF();
13962     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
13963     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
13964     if (!V.isNegative()) {
13965       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
13966         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13967     } else {
13968       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
13969         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
13970                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
13971     }
13972   }
13973 
13974   // copysign(fabs(x), y) -> copysign(x, y)
13975   // copysign(fneg(x), y) -> copysign(x, y)
13976   // copysign(copysign(x,z), y) -> copysign(x, y)
13977   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
13978       N0.getOpcode() == ISD::FCOPYSIGN)
13979     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
13980 
13981   // copysign(x, abs(y)) -> abs(x)
13982   if (N1.getOpcode() == ISD::FABS)
13983     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13984 
13985   // copysign(x, copysign(y,z)) -> copysign(x, z)
13986   if (N1.getOpcode() == ISD::FCOPYSIGN)
13987     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
13988 
13989   // copysign(x, fp_extend(y)) -> copysign(x, y)
13990   // copysign(x, fp_round(y)) -> copysign(x, y)
13991   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
13992     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
13993 
13994   return SDValue();
13995 }
13996 
13997 SDValue DAGCombiner::visitFPOW(SDNode *N) {
13998   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
13999   if (!ExponentC)
14000     return SDValue();
14001   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14002 
14003   // Try to convert x ** (1/3) into cube root.
14004   // TODO: Handle the various flavors of long double.
14005   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
14006   //       Some range near 1/3 should be fine.
14007   EVT VT = N->getValueType(0);
14008   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
14009       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
14010     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
14011     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
14012     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
14013     // For regular numbers, rounding may cause the results to differ.
14014     // Therefore, we require { nsz ninf nnan afn } for this transform.
14015     // TODO: We could select out the special cases if we don't have nsz/ninf.
14016     SDNodeFlags Flags = N->getFlags();
14017     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
14018         !Flags.hasApproximateFuncs())
14019       return SDValue();
14020 
14021     // Do not create a cbrt() libcall if the target does not have it, and do not
14022     // turn a pow that has lowering support into a cbrt() libcall.
14023     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
14024         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
14025          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
14026       return SDValue();
14027 
14028     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
14029   }
14030 
14031   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
14032   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
14033   // TODO: This could be extended (using a target hook) to handle smaller
14034   // power-of-2 fractional exponents.
14035   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
14036   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
14037   if (ExponentIs025 || ExponentIs075) {
14038     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
14039     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
14040     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
14041     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
14042     // For regular numbers, rounding may cause the results to differ.
14043     // Therefore, we require { nsz ninf afn } for this transform.
14044     // TODO: We could select out the special cases if we don't have nsz/ninf.
14045     SDNodeFlags Flags = N->getFlags();
14046 
14047     // We only need no signed zeros for the 0.25 case.
14048     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
14049         !Flags.hasApproximateFuncs())
14050       return SDValue();
14051 
14052     // Don't double the number of libcalls. We are trying to inline fast code.
14053     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
14054       return SDValue();
14055 
14056     // Assume that libcalls are the smallest code.
14057     // TODO: This restriction should probably be lifted for vectors.
14058     if (ForCodeSize)
14059       return SDValue();
14060 
14061     // pow(X, 0.25) --> sqrt(sqrt(X))
14062     SDLoc DL(N);
14063     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
14064     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
14065     if (ExponentIs025)
14066       return SqrtSqrt;
14067     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
14068     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
14069   }
14070 
14071   return SDValue();
14072 }
14073 
14074 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
14075                                const TargetLowering &TLI) {
14076   // This optimization is guarded by a function attribute because it may produce
14077   // unexpected results. Ie, programs may be relying on the platform-specific
14078   // undefined behavior when the float-to-int conversion overflows.
14079   const Function &F = DAG.getMachineFunction().getFunction();
14080   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
14081   if (StrictOverflow.getValueAsString().equals("false"))
14082     return SDValue();
14083 
14084   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
14085   // replacing casts with a libcall. We also must be allowed to ignore -0.0
14086   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
14087   // conversions would return +0.0.
14088   // FIXME: We should be able to use node-level FMF here.
14089   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
14090   EVT VT = N->getValueType(0);
14091   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
14092       !DAG.getTarget().Options.NoSignedZerosFPMath)
14093     return SDValue();
14094 
14095   // fptosi/fptoui round towards zero, so converting from FP to integer and
14096   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
14097   SDValue N0 = N->getOperand(0);
14098   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
14099       N0.getOperand(0).getValueType() == VT)
14100     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14101 
14102   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
14103       N0.getOperand(0).getValueType() == VT)
14104     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
14105 
14106   return SDValue();
14107 }
14108 
14109 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
14110   SDValue N0 = N->getOperand(0);
14111   EVT VT = N->getValueType(0);
14112   EVT OpVT = N0.getValueType();
14113 
14114   // [us]itofp(undef) = 0, because the result value is bounded.
14115   if (N0.isUndef())
14116     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14117 
14118   // fold (sint_to_fp c1) -> c1fp
14119   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14120       // ...but only if the target supports immediate floating-point values
14121       (!LegalOperations ||
14122        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14123     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14124 
14125   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
14126   // but UINT_TO_FP is legal on this target, try to convert.
14127   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
14128       hasOperation(ISD::UINT_TO_FP, OpVT)) {
14129     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
14130     if (DAG.SignBitIsZero(N0))
14131       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14132   }
14133 
14134   // The next optimizations are desirable only if SELECT_CC can be lowered.
14135   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
14136   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
14137       !VT.isVector() &&
14138       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14139     SDLoc DL(N);
14140     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
14141                          DAG.getConstantFP(0.0, DL, VT));
14142   }
14143 
14144   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
14145   //      (select (setcc x, y, cc), 1.0, 0.0)
14146   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
14147       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
14148       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14149     SDLoc DL(N);
14150     return DAG.getSelect(DL, VT, N0.getOperand(0),
14151                          DAG.getConstantFP(1.0, DL, VT),
14152                          DAG.getConstantFP(0.0, DL, VT));
14153   }
14154 
14155   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14156     return FTrunc;
14157 
14158   return SDValue();
14159 }
14160 
14161 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
14162   SDValue N0 = N->getOperand(0);
14163   EVT VT = N->getValueType(0);
14164   EVT OpVT = N0.getValueType();
14165 
14166   // [us]itofp(undef) = 0, because the result value is bounded.
14167   if (N0.isUndef())
14168     return DAG.getConstantFP(0.0, SDLoc(N), VT);
14169 
14170   // fold (uint_to_fp c1) -> c1fp
14171   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
14172       // ...but only if the target supports immediate floating-point values
14173       (!LegalOperations ||
14174        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
14175     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
14176 
14177   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
14178   // but SINT_TO_FP is legal on this target, try to convert.
14179   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
14180       hasOperation(ISD::SINT_TO_FP, OpVT)) {
14181     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
14182     if (DAG.SignBitIsZero(N0))
14183       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
14184   }
14185 
14186   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
14187   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
14188       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
14189     SDLoc DL(N);
14190     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
14191                          DAG.getConstantFP(0.0, DL, VT));
14192   }
14193 
14194   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
14195     return FTrunc;
14196 
14197   return SDValue();
14198 }
14199 
14200 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
14201 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
14202   SDValue N0 = N->getOperand(0);
14203   EVT VT = N->getValueType(0);
14204 
14205   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
14206     return SDValue();
14207 
14208   SDValue Src = N0.getOperand(0);
14209   EVT SrcVT = Src.getValueType();
14210   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
14211   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
14212 
14213   // We can safely assume the conversion won't overflow the output range,
14214   // because (for example) (uint8_t)18293.f is undefined behavior.
14215 
14216   // Since we can assume the conversion won't overflow, our decision as to
14217   // whether the input will fit in the float should depend on the minimum
14218   // of the input range and output range.
14219 
14220   // This means this is also safe for a signed input and unsigned output, since
14221   // a negative input would lead to undefined behavior.
14222   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
14223   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
14224   unsigned ActualSize = std::min(InputSize, OutputSize);
14225   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
14226 
14227   // We can only fold away the float conversion if the input range can be
14228   // represented exactly in the float range.
14229   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
14230     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
14231       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
14232                                                        : ISD::ZERO_EXTEND;
14233       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
14234     }
14235     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
14236       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
14237     return DAG.getBitcast(VT, Src);
14238   }
14239   return SDValue();
14240 }
14241 
14242 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
14243   SDValue N0 = N->getOperand(0);
14244   EVT VT = N->getValueType(0);
14245 
14246   // fold (fp_to_sint undef) -> undef
14247   if (N0.isUndef())
14248     return DAG.getUNDEF(VT);
14249 
14250   // fold (fp_to_sint c1fp) -> c1
14251   if (isConstantFPBuildVectorOrConstantFP(N0))
14252     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
14253 
14254   return FoldIntToFPToInt(N, DAG);
14255 }
14256 
14257 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
14258   SDValue N0 = N->getOperand(0);
14259   EVT VT = N->getValueType(0);
14260 
14261   // fold (fp_to_uint undef) -> undef
14262   if (N0.isUndef())
14263     return DAG.getUNDEF(VT);
14264 
14265   // fold (fp_to_uint c1fp) -> c1
14266   if (isConstantFPBuildVectorOrConstantFP(N0))
14267     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
14268 
14269   return FoldIntToFPToInt(N, DAG);
14270 }
14271 
14272 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
14273   SDValue N0 = N->getOperand(0);
14274   SDValue N1 = N->getOperand(1);
14275   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
14276   EVT VT = N->getValueType(0);
14277 
14278   // fold (fp_round c1fp) -> c1fp
14279   if (N0CFP)
14280     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
14281 
14282   // fold (fp_round (fp_extend x)) -> x
14283   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
14284     return N0.getOperand(0);
14285 
14286   // fold (fp_round (fp_round x)) -> (fp_round x)
14287   if (N0.getOpcode() == ISD::FP_ROUND) {
14288     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
14289     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
14290 
14291     // Skip this folding if it results in an fp_round from f80 to f16.
14292     //
14293     // f80 to f16 always generates an expensive (and as yet, unimplemented)
14294     // libcall to __truncxfhf2 instead of selecting native f16 conversion
14295     // instructions from f32 or f64.  Moreover, the first (value-preserving)
14296     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
14297     // x86.
14298     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
14299       return SDValue();
14300 
14301     // If the first fp_round isn't a value preserving truncation, it might
14302     // introduce a tie in the second fp_round, that wouldn't occur in the
14303     // single-step fp_round we want to fold to.
14304     // In other words, double rounding isn't the same as rounding.
14305     // Also, this is a value preserving truncation iff both fp_round's are.
14306     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
14307       SDLoc DL(N);
14308       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
14309                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
14310     }
14311   }
14312 
14313   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
14314   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
14315     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
14316                               N0.getOperand(0), N1);
14317     AddToWorklist(Tmp.getNode());
14318     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
14319                        Tmp, N0.getOperand(1));
14320   }
14321 
14322   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14323     return NewVSel;
14324 
14325   return SDValue();
14326 }
14327 
14328 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
14329   SDValue N0 = N->getOperand(0);
14330   EVT VT = N->getValueType(0);
14331 
14332   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
14333   if (N->hasOneUse() &&
14334       N->use_begin()->getOpcode() == ISD::FP_ROUND)
14335     return SDValue();
14336 
14337   // fold (fp_extend c1fp) -> c1fp
14338   if (isConstantFPBuildVectorOrConstantFP(N0))
14339     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
14340 
14341   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
14342   if (N0.getOpcode() == ISD::FP16_TO_FP &&
14343       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
14344     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
14345 
14346   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
14347   // value of X.
14348   if (N0.getOpcode() == ISD::FP_ROUND
14349       && N0.getConstantOperandVal(1) == 1) {
14350     SDValue In = N0.getOperand(0);
14351     if (In.getValueType() == VT) return In;
14352     if (VT.bitsLT(In.getValueType()))
14353       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
14354                          In, N0.getOperand(1));
14355     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
14356   }
14357 
14358   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
14359   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14360        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
14361     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14362     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
14363                                      LN0->getChain(),
14364                                      LN0->getBasePtr(), N0.getValueType(),
14365                                      LN0->getMemOperand());
14366     CombineTo(N, ExtLoad);
14367     CombineTo(N0.getNode(),
14368               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
14369                           N0.getValueType(), ExtLoad,
14370                           DAG.getIntPtrConstant(1, SDLoc(N0))),
14371               ExtLoad.getValue(1));
14372     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14373   }
14374 
14375   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14376     return NewVSel;
14377 
14378   return SDValue();
14379 }
14380 
14381 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
14382   SDValue N0 = N->getOperand(0);
14383   EVT VT = N->getValueType(0);
14384 
14385   // fold (fceil c1) -> fceil(c1)
14386   if (isConstantFPBuildVectorOrConstantFP(N0))
14387     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
14388 
14389   return SDValue();
14390 }
14391 
14392 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
14393   SDValue N0 = N->getOperand(0);
14394   EVT VT = N->getValueType(0);
14395 
14396   // fold (ftrunc c1) -> ftrunc(c1)
14397   if (isConstantFPBuildVectorOrConstantFP(N0))
14398     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
14399 
14400   // fold ftrunc (known rounded int x) -> x
14401   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
14402   // likely to be generated to extract integer from a rounded floating value.
14403   switch (N0.getOpcode()) {
14404   default: break;
14405   case ISD::FRINT:
14406   case ISD::FTRUNC:
14407   case ISD::FNEARBYINT:
14408   case ISD::FFLOOR:
14409   case ISD::FCEIL:
14410     return N0;
14411   }
14412 
14413   return SDValue();
14414 }
14415 
14416 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
14417   SDValue N0 = N->getOperand(0);
14418   EVT VT = N->getValueType(0);
14419 
14420   // fold (ffloor c1) -> ffloor(c1)
14421   if (isConstantFPBuildVectorOrConstantFP(N0))
14422     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
14423 
14424   return SDValue();
14425 }
14426 
14427 SDValue DAGCombiner::visitFNEG(SDNode *N) {
14428   SDValue N0 = N->getOperand(0);
14429   EVT VT = N->getValueType(0);
14430   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14431 
14432   // Constant fold FNEG.
14433   if (isConstantFPBuildVectorOrConstantFP(N0))
14434     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
14435 
14436   if (SDValue NegN0 =
14437           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
14438     return NegN0;
14439 
14440   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
14441   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
14442   // know it was called from a context with a nsz flag if the input fsub does
14443   // not.
14444   if (N0.getOpcode() == ISD::FSUB &&
14445       (DAG.getTarget().Options.NoSignedZerosFPMath ||
14446        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
14447     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
14448                        N0.getOperand(0));
14449   }
14450 
14451   if (SDValue Cast = foldSignChangeInBitcast(N))
14452     return Cast;
14453 
14454   return SDValue();
14455 }
14456 
14457 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
14458                             APFloat (*Op)(const APFloat &, const APFloat &)) {
14459   SDValue N0 = N->getOperand(0);
14460   SDValue N1 = N->getOperand(1);
14461   EVT VT = N->getValueType(0);
14462   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
14463   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
14464   const SDNodeFlags Flags = N->getFlags();
14465   unsigned Opc = N->getOpcode();
14466   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
14467   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
14468   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
14469 
14470   if (N0CFP && N1CFP) {
14471     const APFloat &C0 = N0CFP->getValueAPF();
14472     const APFloat &C1 = N1CFP->getValueAPF();
14473     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
14474   }
14475 
14476   // Canonicalize to constant on RHS.
14477   if (isConstantFPBuildVectorOrConstantFP(N0) &&
14478       !isConstantFPBuildVectorOrConstantFP(N1))
14479     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
14480 
14481   if (N1CFP) {
14482     const APFloat &AF = N1CFP->getValueAPF();
14483 
14484     // minnum(X, nan) -> X
14485     // maxnum(X, nan) -> X
14486     // minimum(X, nan) -> nan
14487     // maximum(X, nan) -> nan
14488     if (AF.isNaN())
14489       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
14490 
14491     // In the following folds, inf can be replaced with the largest finite
14492     // float, if the ninf flag is set.
14493     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
14494       // minnum(X, -inf) -> -inf
14495       // maxnum(X, +inf) -> +inf
14496       // minimum(X, -inf) -> -inf if nnan
14497       // maximum(X, +inf) -> +inf if nnan
14498       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
14499         return N->getOperand(1);
14500 
14501       // minnum(X, +inf) -> X if nnan
14502       // maxnum(X, -inf) -> X if nnan
14503       // minimum(X, +inf) -> X
14504       // maximum(X, -inf) -> X
14505       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
14506         return N->getOperand(0);
14507     }
14508   }
14509 
14510   return SDValue();
14511 }
14512 
14513 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
14514   return visitFMinMax(DAG, N, minnum);
14515 }
14516 
14517 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
14518   return visitFMinMax(DAG, N, maxnum);
14519 }
14520 
14521 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
14522   return visitFMinMax(DAG, N, minimum);
14523 }
14524 
14525 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
14526   return visitFMinMax(DAG, N, maximum);
14527 }
14528 
14529 SDValue DAGCombiner::visitFABS(SDNode *N) {
14530   SDValue N0 = N->getOperand(0);
14531   EVT VT = N->getValueType(0);
14532 
14533   // fold (fabs c1) -> fabs(c1)
14534   if (isConstantFPBuildVectorOrConstantFP(N0))
14535     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
14536 
14537   // fold (fabs (fabs x)) -> (fabs x)
14538   if (N0.getOpcode() == ISD::FABS)
14539     return N->getOperand(0);
14540 
14541   // fold (fabs (fneg x)) -> (fabs x)
14542   // fold (fabs (fcopysign x, y)) -> (fabs x)
14543   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
14544     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
14545 
14546   if (SDValue Cast = foldSignChangeInBitcast(N))
14547     return Cast;
14548 
14549   return SDValue();
14550 }
14551 
14552 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
14553   SDValue Chain = N->getOperand(0);
14554   SDValue N1 = N->getOperand(1);
14555   SDValue N2 = N->getOperand(2);
14556 
14557   // If N is a constant we could fold this into a fallthrough or unconditional
14558   // branch. However that doesn't happen very often in normal code, because
14559   // Instcombine/SimplifyCFG should have handled the available opportunities.
14560   // If we did this folding here, it would be necessary to update the
14561   // MachineBasicBlock CFG, which is awkward.
14562 
14563   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
14564   // on the target.
14565   if (N1.getOpcode() == ISD::SETCC &&
14566       TLI.isOperationLegalOrCustom(ISD::BR_CC,
14567                                    N1.getOperand(0).getValueType())) {
14568     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14569                        Chain, N1.getOperand(2),
14570                        N1.getOperand(0), N1.getOperand(1), N2);
14571   }
14572 
14573   if (N1.hasOneUse()) {
14574     // rebuildSetCC calls visitXor which may change the Chain when there is a
14575     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
14576     HandleSDNode ChainHandle(Chain);
14577     if (SDValue NewN1 = rebuildSetCC(N1))
14578       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
14579                          ChainHandle.getValue(), NewN1, N2);
14580   }
14581 
14582   return SDValue();
14583 }
14584 
14585 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
14586   if (N.getOpcode() == ISD::SRL ||
14587       (N.getOpcode() == ISD::TRUNCATE &&
14588        (N.getOperand(0).hasOneUse() &&
14589         N.getOperand(0).getOpcode() == ISD::SRL))) {
14590     // Look pass the truncate.
14591     if (N.getOpcode() == ISD::TRUNCATE)
14592       N = N.getOperand(0);
14593 
14594     // Match this pattern so that we can generate simpler code:
14595     //
14596     //   %a = ...
14597     //   %b = and i32 %a, 2
14598     //   %c = srl i32 %b, 1
14599     //   brcond i32 %c ...
14600     //
14601     // into
14602     //
14603     //   %a = ...
14604     //   %b = and i32 %a, 2
14605     //   %c = setcc eq %b, 0
14606     //   brcond %c ...
14607     //
14608     // This applies only when the AND constant value has one bit set and the
14609     // SRL constant is equal to the log2 of the AND constant. The back-end is
14610     // smart enough to convert the result into a TEST/JMP sequence.
14611     SDValue Op0 = N.getOperand(0);
14612     SDValue Op1 = N.getOperand(1);
14613 
14614     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
14615       SDValue AndOp1 = Op0.getOperand(1);
14616 
14617       if (AndOp1.getOpcode() == ISD::Constant) {
14618         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
14619 
14620         if (AndConst.isPowerOf2() &&
14621             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
14622           SDLoc DL(N);
14623           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
14624                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
14625                               ISD::SETNE);
14626         }
14627       }
14628     }
14629   }
14630 
14631   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
14632   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
14633   if (N.getOpcode() == ISD::XOR) {
14634     // Because we may call this on a speculatively constructed
14635     // SimplifiedSetCC Node, we need to simplify this node first.
14636     // Ideally this should be folded into SimplifySetCC and not
14637     // here. For now, grab a handle to N so we don't lose it from
14638     // replacements interal to the visit.
14639     HandleSDNode XORHandle(N);
14640     while (N.getOpcode() == ISD::XOR) {
14641       SDValue Tmp = visitXOR(N.getNode());
14642       // No simplification done.
14643       if (!Tmp.getNode())
14644         break;
14645       // Returning N is form in-visit replacement that may invalidated
14646       // N. Grab value from Handle.
14647       if (Tmp.getNode() == N.getNode())
14648         N = XORHandle.getValue();
14649       else // Node simplified. Try simplifying again.
14650         N = Tmp;
14651     }
14652 
14653     if (N.getOpcode() != ISD::XOR)
14654       return N;
14655 
14656     SDValue Op0 = N->getOperand(0);
14657     SDValue Op1 = N->getOperand(1);
14658 
14659     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
14660       bool Equal = false;
14661       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
14662       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
14663           Op0.getValueType() == MVT::i1) {
14664         N = Op0;
14665         Op0 = N->getOperand(0);
14666         Op1 = N->getOperand(1);
14667         Equal = true;
14668       }
14669 
14670       EVT SetCCVT = N.getValueType();
14671       if (LegalTypes)
14672         SetCCVT = getSetCCResultType(SetCCVT);
14673       // Replace the uses of XOR with SETCC
14674       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
14675                           Equal ? ISD::SETEQ : ISD::SETNE);
14676     }
14677   }
14678 
14679   return SDValue();
14680 }
14681 
14682 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
14683 //
14684 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
14685   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
14686   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
14687 
14688   // If N is a constant we could fold this into a fallthrough or unconditional
14689   // branch. However that doesn't happen very often in normal code, because
14690   // Instcombine/SimplifyCFG should have handled the available opportunities.
14691   // If we did this folding here, it would be necessary to update the
14692   // MachineBasicBlock CFG, which is awkward.
14693 
14694   // Use SimplifySetCC to simplify SETCC's.
14695   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
14696                                CondLHS, CondRHS, CC->get(), SDLoc(N),
14697                                false);
14698   if (Simp.getNode()) AddToWorklist(Simp.getNode());
14699 
14700   // fold to a simpler setcc
14701   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
14702     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
14703                        N->getOperand(0), Simp.getOperand(2),
14704                        Simp.getOperand(0), Simp.getOperand(1),
14705                        N->getOperand(4));
14706 
14707   return SDValue();
14708 }
14709 
14710 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
14711                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
14712                                      const TargetLowering &TLI) {
14713   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
14714     if (LD->isIndexed())
14715       return false;
14716     EVT VT = LD->getMemoryVT();
14717     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
14718       return false;
14719     Ptr = LD->getBasePtr();
14720   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
14721     if (ST->isIndexed())
14722       return false;
14723     EVT VT = ST->getMemoryVT();
14724     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
14725       return false;
14726     Ptr = ST->getBasePtr();
14727     IsLoad = false;
14728   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
14729     if (LD->isIndexed())
14730       return false;
14731     EVT VT = LD->getMemoryVT();
14732     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
14733         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
14734       return false;
14735     Ptr = LD->getBasePtr();
14736     IsMasked = true;
14737   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
14738     if (ST->isIndexed())
14739       return false;
14740     EVT VT = ST->getMemoryVT();
14741     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
14742         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
14743       return false;
14744     Ptr = ST->getBasePtr();
14745     IsLoad = false;
14746     IsMasked = true;
14747   } else {
14748     return false;
14749   }
14750   return true;
14751 }
14752 
14753 /// Try turning a load/store into a pre-indexed load/store when the base
14754 /// pointer is an add or subtract and it has other uses besides the load/store.
14755 /// After the transformation, the new indexed load/store has effectively folded
14756 /// the add/subtract in and all of its other uses are redirected to the
14757 /// new load/store.
14758 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
14759   if (Level < AfterLegalizeDAG)
14760     return false;
14761 
14762   bool IsLoad = true;
14763   bool IsMasked = false;
14764   SDValue Ptr;
14765   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
14766                                 Ptr, TLI))
14767     return false;
14768 
14769   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
14770   // out.  There is no reason to make this a preinc/predec.
14771   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
14772       Ptr.getNode()->hasOneUse())
14773     return false;
14774 
14775   // Ask the target to do addressing mode selection.
14776   SDValue BasePtr;
14777   SDValue Offset;
14778   ISD::MemIndexedMode AM = ISD::UNINDEXED;
14779   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
14780     return false;
14781 
14782   // Backends without true r+i pre-indexed forms may need to pass a
14783   // constant base with a variable offset so that constant coercion
14784   // will work with the patterns in canonical form.
14785   bool Swapped = false;
14786   if (isa<ConstantSDNode>(BasePtr)) {
14787     std::swap(BasePtr, Offset);
14788     Swapped = true;
14789   }
14790 
14791   // Don't create a indexed load / store with zero offset.
14792   if (isNullConstant(Offset))
14793     return false;
14794 
14795   // Try turning it into a pre-indexed load / store except when:
14796   // 1) The new base ptr is a frame index.
14797   // 2) If N is a store and the new base ptr is either the same as or is a
14798   //    predecessor of the value being stored.
14799   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
14800   //    that would create a cycle.
14801   // 4) All uses are load / store ops that use it as old base ptr.
14802 
14803   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
14804   // (plus the implicit offset) to a register to preinc anyway.
14805   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14806     return false;
14807 
14808   // Check #2.
14809   if (!IsLoad) {
14810     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
14811                            : cast<StoreSDNode>(N)->getValue();
14812 
14813     // Would require a copy.
14814     if (Val == BasePtr)
14815       return false;
14816 
14817     // Would create a cycle.
14818     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
14819       return false;
14820   }
14821 
14822   // Caches for hasPredecessorHelper.
14823   SmallPtrSet<const SDNode *, 32> Visited;
14824   SmallVector<const SDNode *, 16> Worklist;
14825   Worklist.push_back(N);
14826 
14827   // If the offset is a constant, there may be other adds of constants that
14828   // can be folded with this one. We should do this to avoid having to keep
14829   // a copy of the original base pointer.
14830   SmallVector<SDNode *, 16> OtherUses;
14831   if (isa<ConstantSDNode>(Offset))
14832     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
14833                               UE = BasePtr.getNode()->use_end();
14834          UI != UE; ++UI) {
14835       SDUse &Use = UI.getUse();
14836       // Skip the use that is Ptr and uses of other results from BasePtr's
14837       // node (important for nodes that return multiple results).
14838       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
14839         continue;
14840 
14841       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
14842         continue;
14843 
14844       if (Use.getUser()->getOpcode() != ISD::ADD &&
14845           Use.getUser()->getOpcode() != ISD::SUB) {
14846         OtherUses.clear();
14847         break;
14848       }
14849 
14850       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
14851       if (!isa<ConstantSDNode>(Op1)) {
14852         OtherUses.clear();
14853         break;
14854       }
14855 
14856       // FIXME: In some cases, we can be smarter about this.
14857       if (Op1.getValueType() != Offset.getValueType()) {
14858         OtherUses.clear();
14859         break;
14860       }
14861 
14862       OtherUses.push_back(Use.getUser());
14863     }
14864 
14865   if (Swapped)
14866     std::swap(BasePtr, Offset);
14867 
14868   // Now check for #3 and #4.
14869   bool RealUse = false;
14870 
14871   for (SDNode *Use : Ptr.getNode()->uses()) {
14872     if (Use == N)
14873       continue;
14874     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
14875       return false;
14876 
14877     // If Ptr may be folded in addressing mode of other use, then it's
14878     // not profitable to do this transformation.
14879     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
14880       RealUse = true;
14881   }
14882 
14883   if (!RealUse)
14884     return false;
14885 
14886   SDValue Result;
14887   if (!IsMasked) {
14888     if (IsLoad)
14889       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14890     else
14891       Result =
14892           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
14893   } else {
14894     if (IsLoad)
14895       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
14896                                         Offset, AM);
14897     else
14898       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
14899                                          Offset, AM);
14900   }
14901   ++PreIndexedNodes;
14902   ++NodesCombined;
14903   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
14904              Result.getNode()->dump(&DAG); dbgs() << '\n');
14905   WorklistRemover DeadNodes(*this);
14906   if (IsLoad) {
14907     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
14908     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
14909   } else {
14910     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
14911   }
14912 
14913   // Finally, since the node is now dead, remove it from the graph.
14914   deleteAndRecombine(N);
14915 
14916   if (Swapped)
14917     std::swap(BasePtr, Offset);
14918 
14919   // Replace other uses of BasePtr that can be updated to use Ptr
14920   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
14921     unsigned OffsetIdx = 1;
14922     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
14923       OffsetIdx = 0;
14924     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
14925            BasePtr.getNode() && "Expected BasePtr operand");
14926 
14927     // We need to replace ptr0 in the following expression:
14928     //   x0 * offset0 + y0 * ptr0 = t0
14929     // knowing that
14930     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
14931     //
14932     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
14933     // indexed load/store and the expression that needs to be re-written.
14934     //
14935     // Therefore, we have:
14936     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
14937 
14938     ConstantSDNode *CN =
14939       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
14940     int X0, X1, Y0, Y1;
14941     const APInt &Offset0 = CN->getAPIntValue();
14942     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
14943 
14944     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
14945     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
14946     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
14947     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
14948 
14949     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
14950 
14951     APInt CNV = Offset0;
14952     if (X0 < 0) CNV = -CNV;
14953     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
14954     else CNV = CNV - Offset1;
14955 
14956     SDLoc DL(OtherUses[i]);
14957 
14958     // We can now generate the new expression.
14959     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
14960     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
14961 
14962     SDValue NewUse = DAG.getNode(Opcode,
14963                                  DL,
14964                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
14965     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
14966     deleteAndRecombine(OtherUses[i]);
14967   }
14968 
14969   // Replace the uses of Ptr with uses of the updated base value.
14970   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
14971   deleteAndRecombine(Ptr.getNode());
14972   AddToWorklist(Result.getNode());
14973 
14974   return true;
14975 }
14976 
14977 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
14978                                    SDValue &BasePtr, SDValue &Offset,
14979                                    ISD::MemIndexedMode &AM,
14980                                    SelectionDAG &DAG,
14981                                    const TargetLowering &TLI) {
14982   if (PtrUse == N ||
14983       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
14984     return false;
14985 
14986   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
14987     return false;
14988 
14989   // Don't create a indexed load / store with zero offset.
14990   if (isNullConstant(Offset))
14991     return false;
14992 
14993   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
14994     return false;
14995 
14996   SmallPtrSet<const SDNode *, 32> Visited;
14997   for (SDNode *Use : BasePtr.getNode()->uses()) {
14998     if (Use == Ptr.getNode())
14999       continue;
15000 
15001     // No if there's a later user which could perform the index instead.
15002     if (isa<MemSDNode>(Use)) {
15003       bool IsLoad = true;
15004       bool IsMasked = false;
15005       SDValue OtherPtr;
15006       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15007                                    IsMasked, OtherPtr, TLI)) {
15008         SmallVector<const SDNode *, 2> Worklist;
15009         Worklist.push_back(Use);
15010         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
15011           return false;
15012       }
15013     }
15014 
15015     // If all the uses are load / store addresses, then don't do the
15016     // transformation.
15017     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
15018       for (SDNode *UseUse : Use->uses())
15019         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
15020           return false;
15021     }
15022   }
15023   return true;
15024 }
15025 
15026 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
15027                                          bool &IsMasked, SDValue &Ptr,
15028                                          SDValue &BasePtr, SDValue &Offset,
15029                                          ISD::MemIndexedMode &AM,
15030                                          SelectionDAG &DAG,
15031                                          const TargetLowering &TLI) {
15032   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
15033                                 IsMasked, Ptr, TLI) ||
15034       Ptr.getNode()->hasOneUse())
15035     return nullptr;
15036 
15037   // Try turning it into a post-indexed load / store except when
15038   // 1) All uses are load / store ops that use it as base ptr (and
15039   //    it may be folded as addressing mmode).
15040   // 2) Op must be independent of N, i.e. Op is neither a predecessor
15041   //    nor a successor of N. Otherwise, if Op is folded that would
15042   //    create a cycle.
15043   for (SDNode *Op : Ptr->uses()) {
15044     // Check for #1.
15045     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
15046       continue;
15047 
15048     // Check for #2.
15049     SmallPtrSet<const SDNode *, 32> Visited;
15050     SmallVector<const SDNode *, 8> Worklist;
15051     // Ptr is predecessor to both N and Op.
15052     Visited.insert(Ptr.getNode());
15053     Worklist.push_back(N);
15054     Worklist.push_back(Op);
15055     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
15056         !SDNode::hasPredecessorHelper(Op, Visited, Worklist))
15057       return Op;
15058   }
15059   return nullptr;
15060 }
15061 
15062 /// Try to combine a load/store with a add/sub of the base pointer node into a
15063 /// post-indexed load/store. The transformation folded the add/subtract into the
15064 /// new indexed load/store effectively and all of its uses are redirected to the
15065 /// new load/store.
15066 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
15067   if (Level < AfterLegalizeDAG)
15068     return false;
15069 
15070   bool IsLoad = true;
15071   bool IsMasked = false;
15072   SDValue Ptr;
15073   SDValue BasePtr;
15074   SDValue Offset;
15075   ISD::MemIndexedMode AM = ISD::UNINDEXED;
15076   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
15077                                          Offset, AM, DAG, TLI);
15078   if (!Op)
15079     return false;
15080 
15081   SDValue Result;
15082   if (!IsMasked)
15083     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
15084                                          Offset, AM)
15085                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
15086                                           BasePtr, Offset, AM);
15087   else
15088     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
15089                                                BasePtr, Offset, AM)
15090                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
15091                                                 BasePtr, Offset, AM);
15092   ++PostIndexedNodes;
15093   ++NodesCombined;
15094   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
15095              dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
15096              dbgs() << '\n');
15097   WorklistRemover DeadNodes(*this);
15098   if (IsLoad) {
15099     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
15100     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
15101   } else {
15102     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
15103   }
15104 
15105   // Finally, since the node is now dead, remove it from the graph.
15106   deleteAndRecombine(N);
15107 
15108   // Replace the uses of Use with uses of the updated base value.
15109   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
15110                                 Result.getValue(IsLoad ? 1 : 0));
15111   deleteAndRecombine(Op);
15112   return true;
15113 }
15114 
15115 /// Return the base-pointer arithmetic from an indexed \p LD.
15116 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
15117   ISD::MemIndexedMode AM = LD->getAddressingMode();
15118   assert(AM != ISD::UNINDEXED);
15119   SDValue BP = LD->getOperand(1);
15120   SDValue Inc = LD->getOperand(2);
15121 
15122   // Some backends use TargetConstants for load offsets, but don't expect
15123   // TargetConstants in general ADD nodes. We can convert these constants into
15124   // regular Constants (if the constant is not opaque).
15125   assert((Inc.getOpcode() != ISD::TargetConstant ||
15126           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
15127          "Cannot split out indexing using opaque target constants");
15128   if (Inc.getOpcode() == ISD::TargetConstant) {
15129     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
15130     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
15131                           ConstInc->getValueType(0));
15132   }
15133 
15134   unsigned Opc =
15135       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
15136   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
15137 }
15138 
15139 static inline ElementCount numVectorEltsOrZero(EVT T) {
15140   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
15141 }
15142 
15143 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
15144   Val = ST->getValue();
15145   EVT STType = Val.getValueType();
15146   EVT STMemType = ST->getMemoryVT();
15147   if (STType == STMemType)
15148     return true;
15149   if (isTypeLegal(STMemType))
15150     return false; // fail.
15151   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
15152       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
15153     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
15154     return true;
15155   }
15156   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
15157       STType.isInteger() && STMemType.isInteger()) {
15158     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
15159     return true;
15160   }
15161   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
15162     Val = DAG.getBitcast(STMemType, Val);
15163     return true;
15164   }
15165   return false; // fail.
15166 }
15167 
15168 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
15169   EVT LDMemType = LD->getMemoryVT();
15170   EVT LDType = LD->getValueType(0);
15171   assert(Val.getValueType() == LDMemType &&
15172          "Attempting to extend value of non-matching type");
15173   if (LDType == LDMemType)
15174     return true;
15175   if (LDMemType.isInteger() && LDType.isInteger()) {
15176     switch (LD->getExtensionType()) {
15177     case ISD::NON_EXTLOAD:
15178       Val = DAG.getBitcast(LDType, Val);
15179       return true;
15180     case ISD::EXTLOAD:
15181       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
15182       return true;
15183     case ISD::SEXTLOAD:
15184       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
15185       return true;
15186     case ISD::ZEXTLOAD:
15187       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
15188       return true;
15189     }
15190   }
15191   return false;
15192 }
15193 
15194 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
15195   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
15196     return SDValue();
15197   SDValue Chain = LD->getOperand(0);
15198   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
15199   // TODO: Relax this restriction for unordered atomics (see D66309)
15200   if (!ST || !ST->isSimple())
15201     return SDValue();
15202 
15203   EVT LDType = LD->getValueType(0);
15204   EVT LDMemType = LD->getMemoryVT();
15205   EVT STMemType = ST->getMemoryVT();
15206   EVT STType = ST->getValue().getValueType();
15207 
15208   // There are two cases to consider here:
15209   //  1. The store is fixed width and the load is scalable. In this case we
15210   //     don't know at compile time if the store completely envelops the load
15211   //     so we abandon the optimisation.
15212   //  2. The store is scalable and the load is fixed width. We could
15213   //     potentially support a limited number of cases here, but there has been
15214   //     no cost-benefit analysis to prove it's worth it.
15215   bool LdStScalable = LDMemType.isScalableVector();
15216   if (LdStScalable != STMemType.isScalableVector())
15217     return SDValue();
15218 
15219   // If we are dealing with scalable vectors on a big endian platform the
15220   // calculation of offsets below becomes trickier, since we do not know at
15221   // compile time the absolute size of the vector. Until we've done more
15222   // analysis on big-endian platforms it seems better to bail out for now.
15223   if (LdStScalable && DAG.getDataLayout().isBigEndian())
15224     return SDValue();
15225 
15226   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
15227   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
15228   int64_t Offset;
15229   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
15230     return SDValue();
15231 
15232   // Normalize for Endianness. After this Offset=0 will denote that the least
15233   // significant bit in the loaded value maps to the least significant bit in
15234   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
15235   // n:th least significant byte of the stored value.
15236   if (DAG.getDataLayout().isBigEndian())
15237     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() -
15238               (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) /
15239                  8 -
15240              Offset;
15241 
15242   // Check that the stored value cover all bits that are loaded.
15243   bool STCoversLD;
15244 
15245   TypeSize LdMemSize = LDMemType.getSizeInBits();
15246   TypeSize StMemSize = STMemType.getSizeInBits();
15247   if (LdStScalable)
15248     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
15249   else
15250     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <=
15251                                    StMemSize.getFixedSize());
15252 
15253   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
15254     if (LD->isIndexed()) {
15255       // Cannot handle opaque target constants and we must respect the user's
15256       // request not to split indexes from loads.
15257       if (!canSplitIdx(LD))
15258         return SDValue();
15259       SDValue Idx = SplitIndexingFromLoad(LD);
15260       SDValue Ops[] = {Val, Idx, Chain};
15261       return CombineTo(LD, Ops, 3);
15262     }
15263     return CombineTo(LD, Val, Chain);
15264   };
15265 
15266   if (!STCoversLD)
15267     return SDValue();
15268 
15269   // Memory as copy space (potentially masked).
15270   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
15271     // Simple case: Direct non-truncating forwarding
15272     if (LDType.getSizeInBits() == LdMemSize)
15273       return ReplaceLd(LD, ST->getValue(), Chain);
15274     // Can we model the truncate and extension with an and mask?
15275     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
15276         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
15277       // Mask to size of LDMemType
15278       auto Mask =
15279           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
15280                                                StMemSize.getFixedSize()),
15281                           SDLoc(ST), STType);
15282       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
15283       return ReplaceLd(LD, Val, Chain);
15284     }
15285   }
15286 
15287   // TODO: Deal with nonzero offset.
15288   if (LD->getBasePtr().isUndef() || Offset != 0)
15289     return SDValue();
15290   // Model necessary truncations / extenstions.
15291   SDValue Val;
15292   // Truncate Value To Stored Memory Size.
15293   do {
15294     if (!getTruncatedStoreValue(ST, Val))
15295       continue;
15296     if (!isTypeLegal(LDMemType))
15297       continue;
15298     if (STMemType != LDMemType) {
15299       // TODO: Support vectors? This requires extract_subvector/bitcast.
15300       if (!STMemType.isVector() && !LDMemType.isVector() &&
15301           STMemType.isInteger() && LDMemType.isInteger())
15302         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
15303       else
15304         continue;
15305     }
15306     if (!extendLoadedValueToExtension(LD, Val))
15307       continue;
15308     return ReplaceLd(LD, Val, Chain);
15309   } while (false);
15310 
15311   // On failure, cleanup dead nodes we may have created.
15312   if (Val->use_empty())
15313     deleteAndRecombine(Val.getNode());
15314   return SDValue();
15315 }
15316 
15317 SDValue DAGCombiner::visitLOAD(SDNode *N) {
15318   LoadSDNode *LD  = cast<LoadSDNode>(N);
15319   SDValue Chain = LD->getChain();
15320   SDValue Ptr   = LD->getBasePtr();
15321 
15322   // If load is not volatile and there are no uses of the loaded value (and
15323   // the updated indexed value in case of indexed loads), change uses of the
15324   // chain value into uses of the chain input (i.e. delete the dead load).
15325   // TODO: Allow this for unordered atomics (see D66309)
15326   if (LD->isSimple()) {
15327     if (N->getValueType(1) == MVT::Other) {
15328       // Unindexed loads.
15329       if (!N->hasAnyUseOfValue(0)) {
15330         // It's not safe to use the two value CombineTo variant here. e.g.
15331         // v1, chain2 = load chain1, loc
15332         // v2, chain3 = load chain2, loc
15333         // v3         = add v2, c
15334         // Now we replace use of chain2 with chain1.  This makes the second load
15335         // isomorphic to the one we are deleting, and thus makes this load live.
15336         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
15337                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
15338                    dbgs() << "\n");
15339         WorklistRemover DeadNodes(*this);
15340         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15341         AddUsersToWorklist(Chain.getNode());
15342         if (N->use_empty())
15343           deleteAndRecombine(N);
15344 
15345         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15346       }
15347     } else {
15348       // Indexed loads.
15349       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
15350 
15351       // If this load has an opaque TargetConstant offset, then we cannot split
15352       // the indexing into an add/sub directly (that TargetConstant may not be
15353       // valid for a different type of node, and we cannot convert an opaque
15354       // target constant into a regular constant).
15355       bool CanSplitIdx = canSplitIdx(LD);
15356 
15357       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
15358         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
15359         SDValue Index;
15360         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
15361           Index = SplitIndexingFromLoad(LD);
15362           // Try to fold the base pointer arithmetic into subsequent loads and
15363           // stores.
15364           AddUsersToWorklist(N);
15365         } else
15366           Index = DAG.getUNDEF(N->getValueType(1));
15367         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
15368                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
15369                    dbgs() << " and 2 other values\n");
15370         WorklistRemover DeadNodes(*this);
15371         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
15372         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
15373         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
15374         deleteAndRecombine(N);
15375         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
15376       }
15377     }
15378   }
15379 
15380   // If this load is directly stored, replace the load value with the stored
15381   // value.
15382   if (auto V = ForwardStoreValueToDirectLoad(LD))
15383     return V;
15384 
15385   // Try to infer better alignment information than the load already has.
15386   if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) {
15387     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
15388       if (*Alignment > LD->getAlign() &&
15389           isAligned(*Alignment, LD->getSrcValueOffset())) {
15390         SDValue NewLoad = DAG.getExtLoad(
15391             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
15392             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
15393             LD->getMemOperand()->getFlags(), LD->getAAInfo());
15394         // NewLoad will always be N as we are only refining the alignment
15395         assert(NewLoad.getNode() == N);
15396         (void)NewLoad;
15397       }
15398     }
15399   }
15400 
15401   if (LD->isUnindexed()) {
15402     // Walk up chain skipping non-aliasing memory nodes.
15403     SDValue BetterChain = FindBetterChain(LD, Chain);
15404 
15405     // If there is a better chain.
15406     if (Chain != BetterChain) {
15407       SDValue ReplLoad;
15408 
15409       // Replace the chain to void dependency.
15410       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
15411         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
15412                                BetterChain, Ptr, LD->getMemOperand());
15413       } else {
15414         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
15415                                   LD->getValueType(0),
15416                                   BetterChain, Ptr, LD->getMemoryVT(),
15417                                   LD->getMemOperand());
15418       }
15419 
15420       // Create token factor to keep old chain connected.
15421       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
15422                                   MVT::Other, Chain, ReplLoad.getValue(1));
15423 
15424       // Replace uses with load result and token factor
15425       return CombineTo(N, ReplLoad.getValue(0), Token);
15426     }
15427   }
15428 
15429   // Try transforming N to an indexed load.
15430   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15431     return SDValue(N, 0);
15432 
15433   // Try to slice up N to more direct loads if the slices are mapped to
15434   // different register banks or pairing can take place.
15435   if (SliceUpLoad(N))
15436     return SDValue(N, 0);
15437 
15438   return SDValue();
15439 }
15440 
15441 namespace {
15442 
15443 /// Helper structure used to slice a load in smaller loads.
15444 /// Basically a slice is obtained from the following sequence:
15445 /// Origin = load Ty1, Base
15446 /// Shift = srl Ty1 Origin, CstTy Amount
15447 /// Inst = trunc Shift to Ty2
15448 ///
15449 /// Then, it will be rewritten into:
15450 /// Slice = load SliceTy, Base + SliceOffset
15451 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
15452 ///
15453 /// SliceTy is deduced from the number of bits that are actually used to
15454 /// build Inst.
15455 struct LoadedSlice {
15456   /// Helper structure used to compute the cost of a slice.
15457   struct Cost {
15458     /// Are we optimizing for code size.
15459     bool ForCodeSize = false;
15460 
15461     /// Various cost.
15462     unsigned Loads = 0;
15463     unsigned Truncates = 0;
15464     unsigned CrossRegisterBanksCopies = 0;
15465     unsigned ZExts = 0;
15466     unsigned Shift = 0;
15467 
15468     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
15469 
15470     /// Get the cost of one isolated slice.
15471     Cost(const LoadedSlice &LS, bool ForCodeSize)
15472         : ForCodeSize(ForCodeSize), Loads(1) {
15473       EVT TruncType = LS.Inst->getValueType(0);
15474       EVT LoadedType = LS.getLoadedType();
15475       if (TruncType != LoadedType &&
15476           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
15477         ZExts = 1;
15478     }
15479 
15480     /// Account for slicing gain in the current cost.
15481     /// Slicing provide a few gains like removing a shift or a
15482     /// truncate. This method allows to grow the cost of the original
15483     /// load with the gain from this slice.
15484     void addSliceGain(const LoadedSlice &LS) {
15485       // Each slice saves a truncate.
15486       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
15487       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
15488                               LS.Inst->getValueType(0)))
15489         ++Truncates;
15490       // If there is a shift amount, this slice gets rid of it.
15491       if (LS.Shift)
15492         ++Shift;
15493       // If this slice can merge a cross register bank copy, account for it.
15494       if (LS.canMergeExpensiveCrossRegisterBankCopy())
15495         ++CrossRegisterBanksCopies;
15496     }
15497 
15498     Cost &operator+=(const Cost &RHS) {
15499       Loads += RHS.Loads;
15500       Truncates += RHS.Truncates;
15501       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
15502       ZExts += RHS.ZExts;
15503       Shift += RHS.Shift;
15504       return *this;
15505     }
15506 
15507     bool operator==(const Cost &RHS) const {
15508       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
15509              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
15510              ZExts == RHS.ZExts && Shift == RHS.Shift;
15511     }
15512 
15513     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
15514 
15515     bool operator<(const Cost &RHS) const {
15516       // Assume cross register banks copies are as expensive as loads.
15517       // FIXME: Do we want some more target hooks?
15518       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
15519       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
15520       // Unless we are optimizing for code size, consider the
15521       // expensive operation first.
15522       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
15523         return ExpensiveOpsLHS < ExpensiveOpsRHS;
15524       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
15525              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
15526     }
15527 
15528     bool operator>(const Cost &RHS) const { return RHS < *this; }
15529 
15530     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
15531 
15532     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
15533   };
15534 
15535   // The last instruction that represent the slice. This should be a
15536   // truncate instruction.
15537   SDNode *Inst;
15538 
15539   // The original load instruction.
15540   LoadSDNode *Origin;
15541 
15542   // The right shift amount in bits from the original load.
15543   unsigned Shift;
15544 
15545   // The DAG from which Origin came from.
15546   // This is used to get some contextual information about legal types, etc.
15547   SelectionDAG *DAG;
15548 
15549   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
15550               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
15551       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
15552 
15553   /// Get the bits used in a chunk of bits \p BitWidth large.
15554   /// \return Result is \p BitWidth and has used bits set to 1 and
15555   ///         not used bits set to 0.
15556   APInt getUsedBits() const {
15557     // Reproduce the trunc(lshr) sequence:
15558     // - Start from the truncated value.
15559     // - Zero extend to the desired bit width.
15560     // - Shift left.
15561     assert(Origin && "No original load to compare against.");
15562     unsigned BitWidth = Origin->getValueSizeInBits(0);
15563     assert(Inst && "This slice is not bound to an instruction");
15564     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
15565            "Extracted slice is bigger than the whole type!");
15566     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
15567     UsedBits.setAllBits();
15568     UsedBits = UsedBits.zext(BitWidth);
15569     UsedBits <<= Shift;
15570     return UsedBits;
15571   }
15572 
15573   /// Get the size of the slice to be loaded in bytes.
15574   unsigned getLoadedSize() const {
15575     unsigned SliceSize = getUsedBits().countPopulation();
15576     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
15577     return SliceSize / 8;
15578   }
15579 
15580   /// Get the type that will be loaded for this slice.
15581   /// Note: This may not be the final type for the slice.
15582   EVT getLoadedType() const {
15583     assert(DAG && "Missing context");
15584     LLVMContext &Ctxt = *DAG->getContext();
15585     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
15586   }
15587 
15588   /// Get the alignment of the load used for this slice.
15589   Align getAlign() const {
15590     Align Alignment = Origin->getAlign();
15591     uint64_t Offset = getOffsetFromBase();
15592     if (Offset != 0)
15593       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
15594     return Alignment;
15595   }
15596 
15597   /// Check if this slice can be rewritten with legal operations.
15598   bool isLegal() const {
15599     // An invalid slice is not legal.
15600     if (!Origin || !Inst || !DAG)
15601       return false;
15602 
15603     // Offsets are for indexed load only, we do not handle that.
15604     if (!Origin->getOffset().isUndef())
15605       return false;
15606 
15607     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15608 
15609     // Check that the type is legal.
15610     EVT SliceType = getLoadedType();
15611     if (!TLI.isTypeLegal(SliceType))
15612       return false;
15613 
15614     // Check that the load is legal for this type.
15615     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
15616       return false;
15617 
15618     // Check that the offset can be computed.
15619     // 1. Check its type.
15620     EVT PtrType = Origin->getBasePtr().getValueType();
15621     if (PtrType == MVT::Untyped || PtrType.isExtended())
15622       return false;
15623 
15624     // 2. Check that it fits in the immediate.
15625     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
15626       return false;
15627 
15628     // 3. Check that the computation is legal.
15629     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
15630       return false;
15631 
15632     // Check that the zext is legal if it needs one.
15633     EVT TruncateType = Inst->getValueType(0);
15634     if (TruncateType != SliceType &&
15635         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
15636       return false;
15637 
15638     return true;
15639   }
15640 
15641   /// Get the offset in bytes of this slice in the original chunk of
15642   /// bits.
15643   /// \pre DAG != nullptr.
15644   uint64_t getOffsetFromBase() const {
15645     assert(DAG && "Missing context.");
15646     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
15647     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
15648     uint64_t Offset = Shift / 8;
15649     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
15650     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
15651            "The size of the original loaded type is not a multiple of a"
15652            " byte.");
15653     // If Offset is bigger than TySizeInBytes, it means we are loading all
15654     // zeros. This should have been optimized before in the process.
15655     assert(TySizeInBytes > Offset &&
15656            "Invalid shift amount for given loaded size");
15657     if (IsBigEndian)
15658       Offset = TySizeInBytes - Offset - getLoadedSize();
15659     return Offset;
15660   }
15661 
15662   /// Generate the sequence of instructions to load the slice
15663   /// represented by this object and redirect the uses of this slice to
15664   /// this new sequence of instructions.
15665   /// \pre this->Inst && this->Origin are valid Instructions and this
15666   /// object passed the legal check: LoadedSlice::isLegal returned true.
15667   /// \return The last instruction of the sequence used to load the slice.
15668   SDValue loadSlice() const {
15669     assert(Inst && Origin && "Unable to replace a non-existing slice.");
15670     const SDValue &OldBaseAddr = Origin->getBasePtr();
15671     SDValue BaseAddr = OldBaseAddr;
15672     // Get the offset in that chunk of bytes w.r.t. the endianness.
15673     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
15674     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
15675     if (Offset) {
15676       // BaseAddr = BaseAddr + Offset.
15677       EVT ArithType = BaseAddr.getValueType();
15678       SDLoc DL(Origin);
15679       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
15680                               DAG->getConstant(Offset, DL, ArithType));
15681     }
15682 
15683     // Create the type of the loaded slice according to its size.
15684     EVT SliceType = getLoadedType();
15685 
15686     // Create the load for the slice.
15687     SDValue LastInst =
15688         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
15689                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
15690                      Origin->getMemOperand()->getFlags());
15691     // If the final type is not the same as the loaded type, this means that
15692     // we have to pad with zero. Create a zero extend for that.
15693     EVT FinalType = Inst->getValueType(0);
15694     if (SliceType != FinalType)
15695       LastInst =
15696           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
15697     return LastInst;
15698   }
15699 
15700   /// Check if this slice can be merged with an expensive cross register
15701   /// bank copy. E.g.,
15702   /// i = load i32
15703   /// f = bitcast i32 i to float
15704   bool canMergeExpensiveCrossRegisterBankCopy() const {
15705     if (!Inst || !Inst->hasOneUse())
15706       return false;
15707     SDNode *Use = *Inst->use_begin();
15708     if (Use->getOpcode() != ISD::BITCAST)
15709       return false;
15710     assert(DAG && "Missing context");
15711     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
15712     EVT ResVT = Use->getValueType(0);
15713     const TargetRegisterClass *ResRC =
15714         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
15715     const TargetRegisterClass *ArgRC =
15716         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
15717                            Use->getOperand(0)->isDivergent());
15718     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
15719       return false;
15720 
15721     // At this point, we know that we perform a cross-register-bank copy.
15722     // Check if it is expensive.
15723     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
15724     // Assume bitcasts are cheap, unless both register classes do not
15725     // explicitly share a common sub class.
15726     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
15727       return false;
15728 
15729     // Check if it will be merged with the load.
15730     // 1. Check the alignment constraint.
15731     Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
15732         ResVT.getTypeForEVT(*DAG->getContext()));
15733 
15734     if (RequiredAlignment > getAlign())
15735       return false;
15736 
15737     // 2. Check that the load is a legal operation for that type.
15738     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
15739       return false;
15740 
15741     // 3. Check that we do not have a zext in the way.
15742     if (Inst->getValueType(0) != getLoadedType())
15743       return false;
15744 
15745     return true;
15746   }
15747 };
15748 
15749 } // end anonymous namespace
15750 
15751 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
15752 /// \p UsedBits looks like 0..0 1..1 0..0.
15753 static bool areUsedBitsDense(const APInt &UsedBits) {
15754   // If all the bits are one, this is dense!
15755   if (UsedBits.isAllOnesValue())
15756     return true;
15757 
15758   // Get rid of the unused bits on the right.
15759   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
15760   // Get rid of the unused bits on the left.
15761   if (NarrowedUsedBits.countLeadingZeros())
15762     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
15763   // Check that the chunk of bits is completely used.
15764   return NarrowedUsedBits.isAllOnesValue();
15765 }
15766 
15767 /// Check whether or not \p First and \p Second are next to each other
15768 /// in memory. This means that there is no hole between the bits loaded
15769 /// by \p First and the bits loaded by \p Second.
15770 static bool areSlicesNextToEachOther(const LoadedSlice &First,
15771                                      const LoadedSlice &Second) {
15772   assert(First.Origin == Second.Origin && First.Origin &&
15773          "Unable to match different memory origins.");
15774   APInt UsedBits = First.getUsedBits();
15775   assert((UsedBits & Second.getUsedBits()) == 0 &&
15776          "Slices are not supposed to overlap.");
15777   UsedBits |= Second.getUsedBits();
15778   return areUsedBitsDense(UsedBits);
15779 }
15780 
15781 /// Adjust the \p GlobalLSCost according to the target
15782 /// paring capabilities and the layout of the slices.
15783 /// \pre \p GlobalLSCost should account for at least as many loads as
15784 /// there is in the slices in \p LoadedSlices.
15785 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15786                                  LoadedSlice::Cost &GlobalLSCost) {
15787   unsigned NumberOfSlices = LoadedSlices.size();
15788   // If there is less than 2 elements, no pairing is possible.
15789   if (NumberOfSlices < 2)
15790     return;
15791 
15792   // Sort the slices so that elements that are likely to be next to each
15793   // other in memory are next to each other in the list.
15794   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
15795     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
15796     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
15797   });
15798   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
15799   // First (resp. Second) is the first (resp. Second) potentially candidate
15800   // to be placed in a paired load.
15801   const LoadedSlice *First = nullptr;
15802   const LoadedSlice *Second = nullptr;
15803   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
15804                 // Set the beginning of the pair.
15805                                                            First = Second) {
15806     Second = &LoadedSlices[CurrSlice];
15807 
15808     // If First is NULL, it means we start a new pair.
15809     // Get to the next slice.
15810     if (!First)
15811       continue;
15812 
15813     EVT LoadedType = First->getLoadedType();
15814 
15815     // If the types of the slices are different, we cannot pair them.
15816     if (LoadedType != Second->getLoadedType())
15817       continue;
15818 
15819     // Check if the target supplies paired loads for this type.
15820     Align RequiredAlignment;
15821     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
15822       // move to the next pair, this type is hopeless.
15823       Second = nullptr;
15824       continue;
15825     }
15826     // Check if we meet the alignment requirement.
15827     if (First->getAlign() < RequiredAlignment)
15828       continue;
15829 
15830     // Check that both loads are next to each other in memory.
15831     if (!areSlicesNextToEachOther(*First, *Second))
15832       continue;
15833 
15834     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
15835     --GlobalLSCost.Loads;
15836     // Move to the next pair.
15837     Second = nullptr;
15838   }
15839 }
15840 
15841 /// Check the profitability of all involved LoadedSlice.
15842 /// Currently, it is considered profitable if there is exactly two
15843 /// involved slices (1) which are (2) next to each other in memory, and
15844 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
15845 ///
15846 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
15847 /// the elements themselves.
15848 ///
15849 /// FIXME: When the cost model will be mature enough, we can relax
15850 /// constraints (1) and (2).
15851 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
15852                                 const APInt &UsedBits, bool ForCodeSize) {
15853   unsigned NumberOfSlices = LoadedSlices.size();
15854   if (StressLoadSlicing)
15855     return NumberOfSlices > 1;
15856 
15857   // Check (1).
15858   if (NumberOfSlices != 2)
15859     return false;
15860 
15861   // Check (2).
15862   if (!areUsedBitsDense(UsedBits))
15863     return false;
15864 
15865   // Check (3).
15866   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
15867   // The original code has one big load.
15868   OrigCost.Loads = 1;
15869   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
15870     const LoadedSlice &LS = LoadedSlices[CurrSlice];
15871     // Accumulate the cost of all the slices.
15872     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
15873     GlobalSlicingCost += SliceCost;
15874 
15875     // Account as cost in the original configuration the gain obtained
15876     // with the current slices.
15877     OrigCost.addSliceGain(LS);
15878   }
15879 
15880   // If the target supports paired load, adjust the cost accordingly.
15881   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
15882   return OrigCost > GlobalSlicingCost;
15883 }
15884 
15885 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
15886 /// operations, split it in the various pieces being extracted.
15887 ///
15888 /// This sort of thing is introduced by SROA.
15889 /// This slicing takes care not to insert overlapping loads.
15890 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
15891 bool DAGCombiner::SliceUpLoad(SDNode *N) {
15892   if (Level < AfterLegalizeDAG)
15893     return false;
15894 
15895   LoadSDNode *LD = cast<LoadSDNode>(N);
15896   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
15897       !LD->getValueType(0).isInteger())
15898     return false;
15899 
15900   // The algorithm to split up a load of a scalable vector into individual
15901   // elements currently requires knowing the length of the loaded type,
15902   // so will need adjusting to work on scalable vectors.
15903   if (LD->getValueType(0).isScalableVector())
15904     return false;
15905 
15906   // Keep track of already used bits to detect overlapping values.
15907   // In that case, we will just abort the transformation.
15908   APInt UsedBits(LD->getValueSizeInBits(0), 0);
15909 
15910   SmallVector<LoadedSlice, 4> LoadedSlices;
15911 
15912   // Check if this load is used as several smaller chunks of bits.
15913   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
15914   // of computation for each trunc.
15915   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
15916        UI != UIEnd; ++UI) {
15917     // Skip the uses of the chain.
15918     if (UI.getUse().getResNo() != 0)
15919       continue;
15920 
15921     SDNode *User = *UI;
15922     unsigned Shift = 0;
15923 
15924     // Check if this is a trunc(lshr).
15925     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
15926         isa<ConstantSDNode>(User->getOperand(1))) {
15927       Shift = User->getConstantOperandVal(1);
15928       User = *User->use_begin();
15929     }
15930 
15931     // At this point, User is a Truncate, iff we encountered, trunc or
15932     // trunc(lshr).
15933     if (User->getOpcode() != ISD::TRUNCATE)
15934       return false;
15935 
15936     // The width of the type must be a power of 2 and greater than 8-bits.
15937     // Otherwise the load cannot be represented in LLVM IR.
15938     // Moreover, if we shifted with a non-8-bits multiple, the slice
15939     // will be across several bytes. We do not support that.
15940     unsigned Width = User->getValueSizeInBits(0);
15941     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
15942       return false;
15943 
15944     // Build the slice for this chain of computations.
15945     LoadedSlice LS(User, LD, Shift, &DAG);
15946     APInt CurrentUsedBits = LS.getUsedBits();
15947 
15948     // Check if this slice overlaps with another.
15949     if ((CurrentUsedBits & UsedBits) != 0)
15950       return false;
15951     // Update the bits used globally.
15952     UsedBits |= CurrentUsedBits;
15953 
15954     // Check if the new slice would be legal.
15955     if (!LS.isLegal())
15956       return false;
15957 
15958     // Record the slice.
15959     LoadedSlices.push_back(LS);
15960   }
15961 
15962   // Abort slicing if it does not seem to be profitable.
15963   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
15964     return false;
15965 
15966   ++SlicedLoads;
15967 
15968   // Rewrite each chain to use an independent load.
15969   // By construction, each chain can be represented by a unique load.
15970 
15971   // Prepare the argument for the new token factor for all the slices.
15972   SmallVector<SDValue, 8> ArgChains;
15973   for (SmallVectorImpl<LoadedSlice>::const_iterator
15974            LSIt = LoadedSlices.begin(),
15975            LSItEnd = LoadedSlices.end();
15976        LSIt != LSItEnd; ++LSIt) {
15977     SDValue SliceInst = LSIt->loadSlice();
15978     CombineTo(LSIt->Inst, SliceInst, true);
15979     if (SliceInst.getOpcode() != ISD::LOAD)
15980       SliceInst = SliceInst.getOperand(0);
15981     assert(SliceInst->getOpcode() == ISD::LOAD &&
15982            "It takes more than a zext to get to the loaded slice!!");
15983     ArgChains.push_back(SliceInst.getValue(1));
15984   }
15985 
15986   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
15987                               ArgChains);
15988   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
15989   AddToWorklist(Chain.getNode());
15990   return true;
15991 }
15992 
15993 /// Check to see if V is (and load (ptr), imm), where the load is having
15994 /// specific bytes cleared out.  If so, return the byte size being masked out
15995 /// and the shift amount.
15996 static std::pair<unsigned, unsigned>
15997 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
15998   std::pair<unsigned, unsigned> Result(0, 0);
15999 
16000   // Check for the structure we're looking for.
16001   if (V->getOpcode() != ISD::AND ||
16002       !isa<ConstantSDNode>(V->getOperand(1)) ||
16003       !ISD::isNormalLoad(V->getOperand(0).getNode()))
16004     return Result;
16005 
16006   // Check the chain and pointer.
16007   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
16008   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
16009 
16010   // This only handles simple types.
16011   if (V.getValueType() != MVT::i16 &&
16012       V.getValueType() != MVT::i32 &&
16013       V.getValueType() != MVT::i64)
16014     return Result;
16015 
16016   // Check the constant mask.  Invert it so that the bits being masked out are
16017   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
16018   // follow the sign bit for uniformity.
16019   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
16020   unsigned NotMaskLZ = countLeadingZeros(NotMask);
16021   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
16022   unsigned NotMaskTZ = countTrailingZeros(NotMask);
16023   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
16024   if (NotMaskLZ == 64) return Result;  // All zero mask.
16025 
16026   // See if we have a continuous run of bits.  If so, we have 0*1+0*
16027   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
16028     return Result;
16029 
16030   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
16031   if (V.getValueType() != MVT::i64 && NotMaskLZ)
16032     NotMaskLZ -= 64-V.getValueSizeInBits();
16033 
16034   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
16035   switch (MaskedBytes) {
16036   case 1:
16037   case 2:
16038   case 4: break;
16039   default: return Result; // All one mask, or 5-byte mask.
16040   }
16041 
16042   // Verify that the first bit starts at a multiple of mask so that the access
16043   // is aligned the same as the access width.
16044   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
16045 
16046   // For narrowing to be valid, it must be the case that the load the
16047   // immediately preceding memory operation before the store.
16048   if (LD == Chain.getNode())
16049     ; // ok.
16050   else if (Chain->getOpcode() == ISD::TokenFactor &&
16051            SDValue(LD, 1).hasOneUse()) {
16052     // LD has only 1 chain use so they are no indirect dependencies.
16053     if (!LD->isOperandOf(Chain.getNode()))
16054       return Result;
16055   } else
16056     return Result; // Fail.
16057 
16058   Result.first = MaskedBytes;
16059   Result.second = NotMaskTZ/8;
16060   return Result;
16061 }
16062 
16063 /// Check to see if IVal is something that provides a value as specified by
16064 /// MaskInfo. If so, replace the specified store with a narrower store of
16065 /// truncated IVal.
16066 static SDValue
16067 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
16068                                 SDValue IVal, StoreSDNode *St,
16069                                 DAGCombiner *DC) {
16070   unsigned NumBytes = MaskInfo.first;
16071   unsigned ByteShift = MaskInfo.second;
16072   SelectionDAG &DAG = DC->getDAG();
16073 
16074   // Check to see if IVal is all zeros in the part being masked in by the 'or'
16075   // that uses this.  If not, this is not a replacement.
16076   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
16077                                   ByteShift*8, (ByteShift+NumBytes)*8);
16078   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
16079 
16080   // Check that it is legal on the target to do this.  It is legal if the new
16081   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
16082   // legalization (and the target doesn't explicitly think this is a bad idea).
16083   MVT VT = MVT::getIntegerVT(NumBytes * 8);
16084   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16085   if (!DC->isTypeLegal(VT))
16086     return SDValue();
16087   if (St->getMemOperand() &&
16088       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
16089                               *St->getMemOperand()))
16090     return SDValue();
16091 
16092   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
16093   // shifted by ByteShift and truncated down to NumBytes.
16094   if (ByteShift) {
16095     SDLoc DL(IVal);
16096     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
16097                        DAG.getConstant(ByteShift*8, DL,
16098                                     DC->getShiftAmountTy(IVal.getValueType())));
16099   }
16100 
16101   // Figure out the offset for the store and the alignment of the access.
16102   unsigned StOffset;
16103   if (DAG.getDataLayout().isLittleEndian())
16104     StOffset = ByteShift;
16105   else
16106     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
16107 
16108   SDValue Ptr = St->getBasePtr();
16109   if (StOffset) {
16110     SDLoc DL(IVal);
16111     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL);
16112   }
16113 
16114   // Truncate down to the new size.
16115   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
16116 
16117   ++OpsNarrowed;
16118   return DAG
16119       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
16120                 St->getPointerInfo().getWithOffset(StOffset),
16121                 St->getOriginalAlign());
16122 }
16123 
16124 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
16125 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
16126 /// narrowing the load and store if it would end up being a win for performance
16127 /// or code size.
16128 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
16129   StoreSDNode *ST  = cast<StoreSDNode>(N);
16130   if (!ST->isSimple())
16131     return SDValue();
16132 
16133   SDValue Chain = ST->getChain();
16134   SDValue Value = ST->getValue();
16135   SDValue Ptr   = ST->getBasePtr();
16136   EVT VT = Value.getValueType();
16137 
16138   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
16139     return SDValue();
16140 
16141   unsigned Opc = Value.getOpcode();
16142 
16143   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
16144   // is a byte mask indicating a consecutive number of bytes, check to see if
16145   // Y is known to provide just those bytes.  If so, we try to replace the
16146   // load + replace + store sequence with a single (narrower) store, which makes
16147   // the load dead.
16148   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
16149     std::pair<unsigned, unsigned> MaskedLoad;
16150     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
16151     if (MaskedLoad.first)
16152       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16153                                                   Value.getOperand(1), ST,this))
16154         return NewST;
16155 
16156     // Or is commutative, so try swapping X and Y.
16157     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
16158     if (MaskedLoad.first)
16159       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
16160                                                   Value.getOperand(0), ST,this))
16161         return NewST;
16162   }
16163 
16164   if (!EnableReduceLoadOpStoreWidth)
16165     return SDValue();
16166 
16167   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
16168       Value.getOperand(1).getOpcode() != ISD::Constant)
16169     return SDValue();
16170 
16171   SDValue N0 = Value.getOperand(0);
16172   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
16173       Chain == SDValue(N0.getNode(), 1)) {
16174     LoadSDNode *LD = cast<LoadSDNode>(N0);
16175     if (LD->getBasePtr() != Ptr ||
16176         LD->getPointerInfo().getAddrSpace() !=
16177         ST->getPointerInfo().getAddrSpace())
16178       return SDValue();
16179 
16180     // Find the type to narrow it the load / op / store to.
16181     SDValue N1 = Value.getOperand(1);
16182     unsigned BitWidth = N1.getValueSizeInBits();
16183     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
16184     if (Opc == ISD::AND)
16185       Imm ^= APInt::getAllOnesValue(BitWidth);
16186     if (Imm == 0 || Imm.isAllOnesValue())
16187       return SDValue();
16188     unsigned ShAmt = Imm.countTrailingZeros();
16189     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
16190     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
16191     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16192     // The narrowing should be profitable, the load/store operation should be
16193     // legal (or custom) and the store size should be equal to the NewVT width.
16194     while (NewBW < BitWidth &&
16195            (NewVT.getStoreSizeInBits() != NewBW ||
16196             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
16197             !TLI.isNarrowingProfitable(VT, NewVT))) {
16198       NewBW = NextPowerOf2(NewBW);
16199       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
16200     }
16201     if (NewBW >= BitWidth)
16202       return SDValue();
16203 
16204     // If the lsb changed does not start at the type bitwidth boundary,
16205     // start at the previous one.
16206     if (ShAmt % NewBW)
16207       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
16208     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
16209                                    std::min(BitWidth, ShAmt + NewBW));
16210     if ((Imm & Mask) == Imm) {
16211       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
16212       if (Opc == ISD::AND)
16213         NewImm ^= APInt::getAllOnesValue(NewBW);
16214       uint64_t PtrOff = ShAmt / 8;
16215       // For big endian targets, we need to adjust the offset to the pointer to
16216       // load the correct bytes.
16217       if (DAG.getDataLayout().isBigEndian())
16218         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
16219 
16220       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
16221       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
16222       if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
16223         return SDValue();
16224 
16225       SDValue NewPtr =
16226           DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD));
16227       SDValue NewLD =
16228           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
16229                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
16230                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
16231       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
16232                                    DAG.getConstant(NewImm, SDLoc(Value),
16233                                                    NewVT));
16234       SDValue NewST =
16235           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
16236                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
16237 
16238       AddToWorklist(NewPtr.getNode());
16239       AddToWorklist(NewLD.getNode());
16240       AddToWorklist(NewVal.getNode());
16241       WorklistRemover DeadNodes(*this);
16242       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
16243       ++OpsNarrowed;
16244       return NewST;
16245     }
16246   }
16247 
16248   return SDValue();
16249 }
16250 
16251 /// For a given floating point load / store pair, if the load value isn't used
16252 /// by any other operations, then consider transforming the pair to integer
16253 /// load / store operations if the target deems the transformation profitable.
16254 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
16255   StoreSDNode *ST  = cast<StoreSDNode>(N);
16256   SDValue Value = ST->getValue();
16257   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
16258       Value.hasOneUse()) {
16259     LoadSDNode *LD = cast<LoadSDNode>(Value);
16260     EVT VT = LD->getMemoryVT();
16261     if (!VT.isFloatingPoint() ||
16262         VT != ST->getMemoryVT() ||
16263         LD->isNonTemporal() ||
16264         ST->isNonTemporal() ||
16265         LD->getPointerInfo().getAddrSpace() != 0 ||
16266         ST->getPointerInfo().getAddrSpace() != 0)
16267       return SDValue();
16268 
16269     TypeSize VTSize = VT.getSizeInBits();
16270 
16271     // We don't know the size of scalable types at compile time so we cannot
16272     // create an integer of the equivalent size.
16273     if (VTSize.isScalable())
16274       return SDValue();
16275 
16276     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
16277     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
16278         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
16279         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
16280         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
16281       return SDValue();
16282 
16283     Align LDAlign = LD->getAlign();
16284     Align STAlign = ST->getAlign();
16285     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
16286     Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
16287     if (LDAlign < ABIAlign || STAlign < ABIAlign)
16288       return SDValue();
16289 
16290     SDValue NewLD =
16291         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
16292                     LD->getPointerInfo(), LDAlign);
16293 
16294     SDValue NewST =
16295         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
16296                      ST->getPointerInfo(), STAlign);
16297 
16298     AddToWorklist(NewLD.getNode());
16299     AddToWorklist(NewST.getNode());
16300     WorklistRemover DeadNodes(*this);
16301     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
16302     ++LdStFP2Int;
16303     return NewST;
16304   }
16305 
16306   return SDValue();
16307 }
16308 
16309 // This is a helper function for visitMUL to check the profitability
16310 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
16311 // MulNode is the original multiply, AddNode is (add x, c1),
16312 // and ConstNode is c2.
16313 //
16314 // If the (add x, c1) has multiple uses, we could increase
16315 // the number of adds if we make this transformation.
16316 // It would only be worth doing this if we can remove a
16317 // multiply in the process. Check for that here.
16318 // To illustrate:
16319 //     (A + c1) * c3
16320 //     (A + c2) * c3
16321 // We're checking for cases where we have common "c3 * A" expressions.
16322 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
16323                                               SDValue &AddNode,
16324                                               SDValue &ConstNode) {
16325   APInt Val;
16326 
16327   // If the add only has one use, this would be OK to do.
16328   if (AddNode.getNode()->hasOneUse())
16329     return true;
16330 
16331   // Walk all the users of the constant with which we're multiplying.
16332   for (SDNode *Use : ConstNode->uses()) {
16333     if (Use == MulNode) // This use is the one we're on right now. Skip it.
16334       continue;
16335 
16336     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
16337       SDNode *OtherOp;
16338       SDNode *MulVar = AddNode.getOperand(0).getNode();
16339 
16340       // OtherOp is what we're multiplying against the constant.
16341       if (Use->getOperand(0) == ConstNode)
16342         OtherOp = Use->getOperand(1).getNode();
16343       else
16344         OtherOp = Use->getOperand(0).getNode();
16345 
16346       // Check to see if multiply is with the same operand of our "add".
16347       //
16348       //     ConstNode  = CONST
16349       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
16350       //     ...
16351       //     AddNode  = (A + c1)  <-- MulVar is A.
16352       //         = AddNode * ConstNode   <-- current visiting instruction.
16353       //
16354       // If we make this transformation, we will have a common
16355       // multiply (ConstNode * A) that we can save.
16356       if (OtherOp == MulVar)
16357         return true;
16358 
16359       // Now check to see if a future expansion will give us a common
16360       // multiply.
16361       //
16362       //     ConstNode  = CONST
16363       //     AddNode    = (A + c1)
16364       //     ...   = AddNode * ConstNode <-- current visiting instruction.
16365       //     ...
16366       //     OtherOp = (A + c2)
16367       //     Use     = OtherOp * ConstNode <-- visiting Use.
16368       //
16369       // If we make this transformation, we will have a common
16370       // multiply (CONST * A) after we also do the same transformation
16371       // to the "t2" instruction.
16372       if (OtherOp->getOpcode() == ISD::ADD &&
16373           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
16374           OtherOp->getOperand(0).getNode() == MulVar)
16375         return true;
16376     }
16377   }
16378 
16379   // Didn't find a case where this would be profitable.
16380   return false;
16381 }
16382 
16383 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
16384                                          unsigned NumStores) {
16385   SmallVector<SDValue, 8> Chains;
16386   SmallPtrSet<const SDNode *, 8> Visited;
16387   SDLoc StoreDL(StoreNodes[0].MemNode);
16388 
16389   for (unsigned i = 0; i < NumStores; ++i) {
16390     Visited.insert(StoreNodes[i].MemNode);
16391   }
16392 
16393   // don't include nodes that are children or repeated nodes.
16394   for (unsigned i = 0; i < NumStores; ++i) {
16395     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
16396       Chains.push_back(StoreNodes[i].MemNode->getChain());
16397   }
16398 
16399   assert(Chains.size() > 0 && "Chain should have generated a chain");
16400   return DAG.getTokenFactor(StoreDL, Chains);
16401 }
16402 
16403 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
16404     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
16405     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
16406   // Make sure we have something to merge.
16407   if (NumStores < 2)
16408     return false;
16409 
16410   // The latest Node in the DAG.
16411   SDLoc DL(StoreNodes[0].MemNode);
16412 
16413   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
16414   unsigned SizeInBits = NumStores * ElementSizeBits;
16415   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16416 
16417   EVT StoreTy;
16418   if (UseVector) {
16419     unsigned Elts = NumStores * NumMemElts;
16420     // Get the type for the merged vector store.
16421     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16422   } else
16423     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
16424 
16425   SDValue StoredVal;
16426   if (UseVector) {
16427     if (IsConstantSrc) {
16428       SmallVector<SDValue, 8> BuildVector;
16429       for (unsigned I = 0; I != NumStores; ++I) {
16430         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
16431         SDValue Val = St->getValue();
16432         // If constant is of the wrong type, convert it now.
16433         if (MemVT != Val.getValueType()) {
16434           Val = peekThroughBitcasts(Val);
16435           // Deal with constants of wrong size.
16436           if (ElementSizeBits != Val.getValueSizeInBits()) {
16437             EVT IntMemVT =
16438                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
16439             if (isa<ConstantFPSDNode>(Val)) {
16440               // Not clear how to truncate FP values.
16441               return false;
16442             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
16443               Val = DAG.getConstant(C->getAPIntValue()
16444                                         .zextOrTrunc(Val.getValueSizeInBits())
16445                                         .zextOrTrunc(ElementSizeBits),
16446                                     SDLoc(C), IntMemVT);
16447           }
16448           // Make sure correctly size type is the correct type.
16449           Val = DAG.getBitcast(MemVT, Val);
16450         }
16451         BuildVector.push_back(Val);
16452       }
16453       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16454                                                : ISD::BUILD_VECTOR,
16455                               DL, StoreTy, BuildVector);
16456     } else {
16457       SmallVector<SDValue, 8> Ops;
16458       for (unsigned i = 0; i < NumStores; ++i) {
16459         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
16460         SDValue Val = peekThroughBitcasts(St->getValue());
16461         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
16462         // type MemVT. If the underlying value is not the correct
16463         // type, but it is an extraction of an appropriate vector we
16464         // can recast Val to be of the correct type. This may require
16465         // converting between EXTRACT_VECTOR_ELT and
16466         // EXTRACT_SUBVECTOR.
16467         if ((MemVT != Val.getValueType()) &&
16468             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
16469              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
16470           EVT MemVTScalarTy = MemVT.getScalarType();
16471           // We may need to add a bitcast here to get types to line up.
16472           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
16473             Val = DAG.getBitcast(MemVT, Val);
16474           } else {
16475             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
16476                                             : ISD::EXTRACT_VECTOR_ELT;
16477             SDValue Vec = Val.getOperand(0);
16478             SDValue Idx = Val.getOperand(1);
16479             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
16480           }
16481         }
16482         Ops.push_back(Val);
16483       }
16484 
16485       // Build the extracted vector elements back into a vector.
16486       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
16487                                                : ISD::BUILD_VECTOR,
16488                               DL, StoreTy, Ops);
16489     }
16490   } else {
16491     // We should always use a vector store when merging extracted vector
16492     // elements, so this path implies a store of constants.
16493     assert(IsConstantSrc && "Merged vector elements should use vector store");
16494 
16495     APInt StoreInt(SizeInBits, 0);
16496 
16497     // Construct a single integer constant which is made of the smaller
16498     // constant inputs.
16499     bool IsLE = DAG.getDataLayout().isLittleEndian();
16500     for (unsigned i = 0; i < NumStores; ++i) {
16501       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
16502       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
16503 
16504       SDValue Val = St->getValue();
16505       Val = peekThroughBitcasts(Val);
16506       StoreInt <<= ElementSizeBits;
16507       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
16508         StoreInt |= C->getAPIntValue()
16509                         .zextOrTrunc(ElementSizeBits)
16510                         .zextOrTrunc(SizeInBits);
16511       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
16512         StoreInt |= C->getValueAPF()
16513                         .bitcastToAPInt()
16514                         .zextOrTrunc(ElementSizeBits)
16515                         .zextOrTrunc(SizeInBits);
16516         // If fp truncation is necessary give up for now.
16517         if (MemVT.getSizeInBits() != ElementSizeBits)
16518           return false;
16519       } else {
16520         llvm_unreachable("Invalid constant element type");
16521       }
16522     }
16523 
16524     // Create the new Load and Store operations.
16525     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
16526   }
16527 
16528   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16529   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
16530 
16531   // make sure we use trunc store if it's necessary to be legal.
16532   SDValue NewStore;
16533   if (!UseTrunc) {
16534     NewStore =
16535         DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
16536                      FirstInChain->getPointerInfo(), FirstInChain->getAlign());
16537   } else { // Must be realized as a trunc store
16538     EVT LegalizedStoredValTy =
16539         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
16540     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
16541     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
16542     SDValue ExtendedStoreVal =
16543         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
16544                         LegalizedStoredValTy);
16545     NewStore = DAG.getTruncStore(
16546         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
16547         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
16548         FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
16549   }
16550 
16551   // Replace all merged stores with the new store.
16552   for (unsigned i = 0; i < NumStores; ++i)
16553     CombineTo(StoreNodes[i].MemNode, NewStore);
16554 
16555   AddToWorklist(NewChain.getNode());
16556   return true;
16557 }
16558 
16559 void DAGCombiner::getStoreMergeCandidates(
16560     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
16561     SDNode *&RootNode) {
16562   // This holds the base pointer, index, and the offset in bytes from the base
16563   // pointer. We must have a base and an offset. Do not handle stores to undef
16564   // base pointers.
16565   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
16566   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
16567     return;
16568 
16569   SDValue Val = peekThroughBitcasts(St->getValue());
16570   StoreSource StoreSrc = getStoreSource(Val);
16571   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
16572 
16573   // Match on loadbaseptr if relevant.
16574   EVT MemVT = St->getMemoryVT();
16575   BaseIndexOffset LBasePtr;
16576   EVT LoadVT;
16577   if (StoreSrc == StoreSource::Load) {
16578     auto *Ld = cast<LoadSDNode>(Val);
16579     LBasePtr = BaseIndexOffset::match(Ld, DAG);
16580     LoadVT = Ld->getMemoryVT();
16581     // Load and store should be the same type.
16582     if (MemVT != LoadVT)
16583       return;
16584     // Loads must only have one use.
16585     if (!Ld->hasNUsesOfValue(1, 0))
16586       return;
16587     // The memory operands must not be volatile/indexed/atomic.
16588     // TODO: May be able to relax for unordered atomics (see D66309)
16589     if (!Ld->isSimple() || Ld->isIndexed())
16590       return;
16591   }
16592   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
16593                             int64_t &Offset) -> bool {
16594     // The memory operands must not be volatile/indexed/atomic.
16595     // TODO: May be able to relax for unordered atomics (see D66309)
16596     if (!Other->isSimple() || Other->isIndexed())
16597       return false;
16598     // Don't mix temporal stores with non-temporal stores.
16599     if (St->isNonTemporal() != Other->isNonTemporal())
16600       return false;
16601     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
16602     // Allow merging constants of different types as integers.
16603     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
16604                                            : Other->getMemoryVT() != MemVT;
16605     switch (StoreSrc) {
16606     case StoreSource::Load: {
16607       if (NoTypeMatch)
16608         return false;
16609       // The Load's Base Ptr must also match.
16610       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
16611       if (!OtherLd)
16612         return false;
16613       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
16614       if (LoadVT != OtherLd->getMemoryVT())
16615         return false;
16616       // Loads must only have one use.
16617       if (!OtherLd->hasNUsesOfValue(1, 0))
16618         return false;
16619       // The memory operands must not be volatile/indexed/atomic.
16620       // TODO: May be able to relax for unordered atomics (see D66309)
16621       if (!OtherLd->isSimple() || OtherLd->isIndexed())
16622         return false;
16623       // Don't mix temporal loads with non-temporal loads.
16624       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
16625         return false;
16626       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
16627         return false;
16628       break;
16629     }
16630     case StoreSource::Constant:
16631       if (NoTypeMatch)
16632         return false;
16633       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
16634         return false;
16635       break;
16636     case StoreSource::Extract:
16637       // Do not merge truncated stores here.
16638       if (Other->isTruncatingStore())
16639         return false;
16640       if (!MemVT.bitsEq(OtherBC.getValueType()))
16641         return false;
16642       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
16643           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
16644         return false;
16645       break;
16646     default:
16647       llvm_unreachable("Unhandled store source for merging");
16648     }
16649     Ptr = BaseIndexOffset::match(Other, DAG);
16650     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
16651   };
16652 
16653   // Check if the pair of StoreNode and the RootNode already bail out many
16654   // times which is over the limit in dependence check.
16655   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
16656                                         SDNode *RootNode) -> bool {
16657     auto RootCount = StoreRootCountMap.find(StoreNode);
16658     return RootCount != StoreRootCountMap.end() &&
16659            RootCount->second.first == RootNode &&
16660            RootCount->second.second > StoreMergeDependenceLimit;
16661   };
16662 
16663   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
16664     // This must be a chain use.
16665     if (UseIter.getOperandNo() != 0)
16666       return;
16667     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
16668       BaseIndexOffset Ptr;
16669       int64_t PtrDiff;
16670       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
16671           !OverLimitInDependenceCheck(OtherStore, RootNode))
16672         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
16673     }
16674   };
16675 
16676   // We looking for a root node which is an ancestor to all mergable
16677   // stores. We search up through a load, to our root and then down
16678   // through all children. For instance we will find Store{1,2,3} if
16679   // St is Store1, Store2. or Store3 where the root is not a load
16680   // which always true for nonvolatile ops. TODO: Expand
16681   // the search to find all valid candidates through multiple layers of loads.
16682   //
16683   // Root
16684   // |-------|-------|
16685   // Load    Load    Store3
16686   // |       |
16687   // Store1   Store2
16688   //
16689   // FIXME: We should be able to climb and
16690   // descend TokenFactors to find candidates as well.
16691 
16692   RootNode = St->getChain().getNode();
16693 
16694   unsigned NumNodesExplored = 0;
16695   const unsigned MaxSearchNodes = 1024;
16696   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
16697     RootNode = Ldn->getChain().getNode();
16698     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16699          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
16700       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
16701         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
16702           TryToAddCandidate(I2);
16703       }
16704     }
16705   } else {
16706     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
16707          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
16708       TryToAddCandidate(I);
16709   }
16710 }
16711 
16712 // We need to check that merging these stores does not cause a loop in
16713 // the DAG. Any store candidate may depend on another candidate
16714 // indirectly through its operand (we already consider dependencies
16715 // through the chain). Check in parallel by searching up from
16716 // non-chain operands of candidates.
16717 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
16718     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
16719     SDNode *RootNode) {
16720   // FIXME: We should be able to truncate a full search of
16721   // predecessors by doing a BFS and keeping tabs the originating
16722   // stores from which worklist nodes come from in a similar way to
16723   // TokenFactor simplfication.
16724 
16725   SmallPtrSet<const SDNode *, 32> Visited;
16726   SmallVector<const SDNode *, 8> Worklist;
16727 
16728   // RootNode is a predecessor to all candidates so we need not search
16729   // past it. Add RootNode (peeking through TokenFactors). Do not count
16730   // these towards size check.
16731 
16732   Worklist.push_back(RootNode);
16733   while (!Worklist.empty()) {
16734     auto N = Worklist.pop_back_val();
16735     if (!Visited.insert(N).second)
16736       continue; // Already present in Visited.
16737     if (N->getOpcode() == ISD::TokenFactor) {
16738       for (SDValue Op : N->ops())
16739         Worklist.push_back(Op.getNode());
16740     }
16741   }
16742 
16743   // Don't count pruning nodes towards max.
16744   unsigned int Max = 1024 + Visited.size();
16745   // Search Ops of store candidates.
16746   for (unsigned i = 0; i < NumStores; ++i) {
16747     SDNode *N = StoreNodes[i].MemNode;
16748     // Of the 4 Store Operands:
16749     //   * Chain (Op 0) -> We have already considered these
16750     //                    in candidate selection and can be
16751     //                    safely ignored
16752     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
16753     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
16754     //                       but aren't necessarily fromt the same base node, so
16755     //                       cycles possible (e.g. via indexed store).
16756     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
16757     //               non-indexed stores). Not constant on all targets (e.g. ARM)
16758     //               and so can participate in a cycle.
16759     for (unsigned j = 1; j < N->getNumOperands(); ++j)
16760       Worklist.push_back(N->getOperand(j).getNode());
16761   }
16762   // Search through DAG. We can stop early if we find a store node.
16763   for (unsigned i = 0; i < NumStores; ++i)
16764     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
16765                                      Max)) {
16766       // If the searching bail out, record the StoreNode and RootNode in the
16767       // StoreRootCountMap. If we have seen the pair many times over a limit,
16768       // we won't add the StoreNode into StoreNodes set again.
16769       if (Visited.size() >= Max) {
16770         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
16771         if (RootCount.first == RootNode)
16772           RootCount.second++;
16773         else
16774           RootCount = {RootNode, 1};
16775       }
16776       return false;
16777     }
16778   return true;
16779 }
16780 
16781 unsigned
16782 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
16783                                   int64_t ElementSizeBytes) const {
16784   while (true) {
16785     // Find a store past the width of the first store.
16786     size_t StartIdx = 0;
16787     while ((StartIdx + 1 < StoreNodes.size()) &&
16788            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
16789               StoreNodes[StartIdx + 1].OffsetFromBase)
16790       ++StartIdx;
16791 
16792     // Bail if we don't have enough candidates to merge.
16793     if (StartIdx + 1 >= StoreNodes.size())
16794       return 0;
16795 
16796     // Trim stores that overlapped with the first store.
16797     if (StartIdx)
16798       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
16799 
16800     // Scan the memory operations on the chain and find the first
16801     // non-consecutive store memory address.
16802     unsigned NumConsecutiveStores = 1;
16803     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
16804     // Check that the addresses are consecutive starting from the second
16805     // element in the list of stores.
16806     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
16807       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
16808       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
16809         break;
16810       NumConsecutiveStores = i + 1;
16811     }
16812     if (NumConsecutiveStores > 1)
16813       return NumConsecutiveStores;
16814 
16815     // There are no consecutive stores at the start of the list.
16816     // Remove the first store and try again.
16817     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
16818   }
16819 }
16820 
16821 bool DAGCombiner::tryStoreMergeOfConstants(
16822     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16823     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
16824   LLVMContext &Context = *DAG.getContext();
16825   const DataLayout &DL = DAG.getDataLayout();
16826   int64_t ElementSizeBytes = MemVT.getStoreSize();
16827   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16828   bool MadeChange = false;
16829 
16830   // Store the constants into memory as one consecutive store.
16831   while (NumConsecutiveStores >= 2) {
16832     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16833     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16834     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16835     unsigned LastLegalType = 1;
16836     unsigned LastLegalVectorType = 1;
16837     bool LastIntegerTrunc = false;
16838     bool NonZero = false;
16839     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
16840     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16841       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
16842       SDValue StoredVal = ST->getValue();
16843       bool IsElementZero = false;
16844       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
16845         IsElementZero = C->isNullValue();
16846       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
16847         IsElementZero = C->getConstantFPValue()->isNullValue();
16848       if (IsElementZero) {
16849         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
16850           FirstZeroAfterNonZero = i;
16851       }
16852       NonZero |= !IsElementZero;
16853 
16854       // Find a legal type for the constant store.
16855       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
16856       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
16857       bool IsFast = false;
16858 
16859       // Break early when size is too large to be legal.
16860       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
16861         break;
16862 
16863       if (TLI.isTypeLegal(StoreTy) &&
16864           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
16865           TLI.allowsMemoryAccess(Context, DL, StoreTy,
16866                                  *FirstInChain->getMemOperand(), &IsFast) &&
16867           IsFast) {
16868         LastIntegerTrunc = false;
16869         LastLegalType = i + 1;
16870         // Or check whether a truncstore is legal.
16871       } else if (TLI.getTypeAction(Context, StoreTy) ==
16872                  TargetLowering::TypePromoteInteger) {
16873         EVT LegalizedStoredValTy =
16874             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
16875         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
16876             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
16877             TLI.allowsMemoryAccess(Context, DL, StoreTy,
16878                                    *FirstInChain->getMemOperand(), &IsFast) &&
16879             IsFast) {
16880           LastIntegerTrunc = true;
16881           LastLegalType = i + 1;
16882         }
16883       }
16884 
16885       // We only use vectors if the constant is known to be zero or the
16886       // target allows it and the function is not marked with the
16887       // noimplicitfloat attribute.
16888       if ((!NonZero ||
16889            TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
16890           AllowVectors) {
16891         // Find a legal type for the vector store.
16892         unsigned Elts = (i + 1) * NumMemElts;
16893         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16894         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
16895             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16896             TLI.allowsMemoryAccess(Context, DL, Ty,
16897                                    *FirstInChain->getMemOperand(), &IsFast) &&
16898             IsFast)
16899           LastLegalVectorType = i + 1;
16900       }
16901     }
16902 
16903     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
16904     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
16905 
16906     // Check if we found a legal integer type that creates a meaningful
16907     // merge.
16908     if (NumElem < 2) {
16909       // We know that candidate stores are in order and of correct
16910       // shape. While there is no mergeable sequence from the
16911       // beginning one may start later in the sequence. The only
16912       // reason a merge of size N could have failed where another of
16913       // the same size would not have, is if the alignment has
16914       // improved or we've dropped a non-zero value. Drop as many
16915       // candidates as we can here.
16916       unsigned NumSkip = 1;
16917       while ((NumSkip < NumConsecutiveStores) &&
16918              (NumSkip < FirstZeroAfterNonZero) &&
16919              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16920         NumSkip++;
16921 
16922       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16923       NumConsecutiveStores -= NumSkip;
16924       continue;
16925     }
16926 
16927     // Check that we can merge these candidates without causing a cycle.
16928     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16929                                                   RootNode)) {
16930       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16931       NumConsecutiveStores -= NumElem;
16932       continue;
16933     }
16934 
16935     MadeChange |= mergeStoresOfConstantsOrVecElts(
16936         StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
16937 
16938     // Remove merged stores for next iteration.
16939     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16940     NumConsecutiveStores -= NumElem;
16941   }
16942   return MadeChange;
16943 }
16944 
16945 bool DAGCombiner::tryStoreMergeOfExtracts(
16946     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
16947     EVT MemVT, SDNode *RootNode) {
16948   LLVMContext &Context = *DAG.getContext();
16949   const DataLayout &DL = DAG.getDataLayout();
16950   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
16951   bool MadeChange = false;
16952 
16953   // Loop on Consecutive Stores on success.
16954   while (NumConsecutiveStores >= 2) {
16955     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
16956     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
16957     unsigned FirstStoreAlign = FirstInChain->getAlignment();
16958     unsigned NumStoresToMerge = 1;
16959     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
16960       // Find a legal type for the vector store.
16961       unsigned Elts = (i + 1) * NumMemElts;
16962       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
16963       bool IsFast = false;
16964 
16965       // Break early when size is too large to be legal.
16966       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
16967         break;
16968 
16969       if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
16970           TLI.allowsMemoryAccess(Context, DL, Ty,
16971                                  *FirstInChain->getMemOperand(), &IsFast) &&
16972           IsFast)
16973         NumStoresToMerge = i + 1;
16974     }
16975 
16976     // Check if we found a legal integer type creating a meaningful
16977     // merge.
16978     if (NumStoresToMerge < 2) {
16979       // We know that candidate stores are in order and of correct
16980       // shape. While there is no mergeable sequence from the
16981       // beginning one may start later in the sequence. The only
16982       // reason a merge of size N could have failed where another of
16983       // the same size would not have, is if the alignment has
16984       // improved. Drop as many candidates as we can here.
16985       unsigned NumSkip = 1;
16986       while ((NumSkip < NumConsecutiveStores) &&
16987              (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16988         NumSkip++;
16989 
16990       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16991       NumConsecutiveStores -= NumSkip;
16992       continue;
16993     }
16994 
16995     // Check that we can merge these candidates without causing a cycle.
16996     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
16997                                                   RootNode)) {
16998       StoreNodes.erase(StoreNodes.begin(),
16999                        StoreNodes.begin() + NumStoresToMerge);
17000       NumConsecutiveStores -= NumStoresToMerge;
17001       continue;
17002     }
17003 
17004     MadeChange |= mergeStoresOfConstantsOrVecElts(
17005         StoreNodes, MemVT, NumStoresToMerge, false, true, false);
17006 
17007     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
17008     NumConsecutiveStores -= NumStoresToMerge;
17009   }
17010   return MadeChange;
17011 }
17012 
17013 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
17014                                        unsigned NumConsecutiveStores, EVT MemVT,
17015                                        SDNode *RootNode, bool AllowVectors,
17016                                        bool IsNonTemporalStore,
17017                                        bool IsNonTemporalLoad) {
17018   LLVMContext &Context = *DAG.getContext();
17019   const DataLayout &DL = DAG.getDataLayout();
17020   int64_t ElementSizeBytes = MemVT.getStoreSize();
17021   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
17022   bool MadeChange = false;
17023 
17024   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
17025 
17026   // Look for load nodes which are used by the stored values.
17027   SmallVector<MemOpLink, 8> LoadNodes;
17028 
17029   // Find acceptable loads. Loads need to have the same chain (token factor),
17030   // must not be zext, volatile, indexed, and they must be consecutive.
17031   BaseIndexOffset LdBasePtr;
17032 
17033   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
17034     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
17035     SDValue Val = peekThroughBitcasts(St->getValue());
17036     LoadSDNode *Ld = cast<LoadSDNode>(Val);
17037 
17038     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
17039     // If this is not the first ptr that we check.
17040     int64_t LdOffset = 0;
17041     if (LdBasePtr.getBase().getNode()) {
17042       // The base ptr must be the same.
17043       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
17044         break;
17045     } else {
17046       // Check that all other base pointers are the same as this one.
17047       LdBasePtr = LdPtr;
17048     }
17049 
17050     // We found a potential memory operand to merge.
17051     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
17052   }
17053 
17054   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
17055     Align RequiredAlignment;
17056     bool NeedRotate = false;
17057     if (LoadNodes.size() == 2) {
17058       // If we have load/store pair instructions and we only have two values,
17059       // don't bother merging.
17060       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
17061           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
17062         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
17063         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
17064         break;
17065       }
17066       // If the loads are reversed, see if we can rotate the halves into place.
17067       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
17068       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
17069       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
17070       if (Offset0 - Offset1 == ElementSizeBytes &&
17071           (hasOperation(ISD::ROTL, PairVT) ||
17072            hasOperation(ISD::ROTR, PairVT))) {
17073         std::swap(LoadNodes[0], LoadNodes[1]);
17074         NeedRotate = true;
17075       }
17076     }
17077     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
17078     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
17079     Align FirstStoreAlign = FirstInChain->getAlign();
17080     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
17081 
17082     // Scan the memory operations on the chain and find the first
17083     // non-consecutive load memory address. These variables hold the index in
17084     // the store node array.
17085 
17086     unsigned LastConsecutiveLoad = 1;
17087 
17088     // This variable refers to the size and not index in the array.
17089     unsigned LastLegalVectorType = 1;
17090     unsigned LastLegalIntegerType = 1;
17091     bool isDereferenceable = true;
17092     bool DoIntegerTruncate = false;
17093     StartAddress = LoadNodes[0].OffsetFromBase;
17094     SDValue LoadChain = FirstLoad->getChain();
17095     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
17096       // All loads must share the same chain.
17097       if (LoadNodes[i].MemNode->getChain() != LoadChain)
17098         break;
17099 
17100       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
17101       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
17102         break;
17103       LastConsecutiveLoad = i;
17104 
17105       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
17106         isDereferenceable = false;
17107 
17108       // Find a legal type for the vector store.
17109       unsigned Elts = (i + 1) * NumMemElts;
17110       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17111 
17112       // Break early when size is too large to be legal.
17113       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
17114         break;
17115 
17116       bool IsFastSt = false;
17117       bool IsFastLd = false;
17118       if (TLI.isTypeLegal(StoreTy) &&
17119           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17120           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17121                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17122           IsFastSt &&
17123           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17124                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17125           IsFastLd) {
17126         LastLegalVectorType = i + 1;
17127       }
17128 
17129       // Find a legal type for the integer store.
17130       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
17131       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
17132       if (TLI.isTypeLegal(StoreTy) &&
17133           TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
17134           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17135                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
17136           IsFastSt &&
17137           TLI.allowsMemoryAccess(Context, DL, StoreTy,
17138                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
17139           IsFastLd) {
17140         LastLegalIntegerType = i + 1;
17141         DoIntegerTruncate = false;
17142         // Or check whether a truncstore and extload is legal.
17143       } else if (TLI.getTypeAction(Context, StoreTy) ==
17144                  TargetLowering::TypePromoteInteger) {
17145         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
17146         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
17147             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
17148             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17149             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
17150             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
17151             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17152                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
17153             IsFastSt &&
17154             TLI.allowsMemoryAccess(Context, DL, StoreTy,
17155                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
17156             IsFastLd) {
17157           LastLegalIntegerType = i + 1;
17158           DoIntegerTruncate = true;
17159         }
17160       }
17161     }
17162 
17163     // Only use vector types if the vector type is larger than the integer
17164     // type. If they are the same, use integers.
17165     bool UseVectorTy =
17166         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
17167     unsigned LastLegalType =
17168         std::max(LastLegalVectorType, LastLegalIntegerType);
17169 
17170     // We add +1 here because the LastXXX variables refer to location while
17171     // the NumElem refers to array/index size.
17172     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
17173     NumElem = std::min(LastLegalType, NumElem);
17174     Align FirstLoadAlign = FirstLoad->getAlign();
17175 
17176     if (NumElem < 2) {
17177       // We know that candidate stores are in order and of correct
17178       // shape. While there is no mergeable sequence from the
17179       // beginning one may start later in the sequence. The only
17180       // reason a merge of size N could have failed where another of
17181       // the same size would not have is if the alignment or either
17182       // the load or store has improved. Drop as many candidates as we
17183       // can here.
17184       unsigned NumSkip = 1;
17185       while ((NumSkip < LoadNodes.size()) &&
17186              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
17187              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
17188         NumSkip++;
17189       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
17190       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
17191       NumConsecutiveStores -= NumSkip;
17192       continue;
17193     }
17194 
17195     // Check that we can merge these candidates without causing a cycle.
17196     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
17197                                                   RootNode)) {
17198       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17199       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17200       NumConsecutiveStores -= NumElem;
17201       continue;
17202     }
17203 
17204     // Find if it is better to use vectors or integers to load and store
17205     // to memory.
17206     EVT JointMemOpVT;
17207     if (UseVectorTy) {
17208       // Find a legal type for the vector store.
17209       unsigned Elts = NumElem * NumMemElts;
17210       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
17211     } else {
17212       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
17213       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
17214     }
17215 
17216     SDLoc LoadDL(LoadNodes[0].MemNode);
17217     SDLoc StoreDL(StoreNodes[0].MemNode);
17218 
17219     // The merged loads are required to have the same incoming chain, so
17220     // using the first's chain is acceptable.
17221 
17222     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
17223     AddToWorklist(NewStoreChain.getNode());
17224 
17225     MachineMemOperand::Flags LdMMOFlags =
17226         isDereferenceable ? MachineMemOperand::MODereferenceable
17227                           : MachineMemOperand::MONone;
17228     if (IsNonTemporalLoad)
17229       LdMMOFlags |= MachineMemOperand::MONonTemporal;
17230 
17231     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
17232                                               ? MachineMemOperand::MONonTemporal
17233                                               : MachineMemOperand::MONone;
17234 
17235     SDValue NewLoad, NewStore;
17236     if (UseVectorTy || !DoIntegerTruncate) {
17237       NewLoad = DAG.getLoad(
17238           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
17239           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
17240       SDValue StoreOp = NewLoad;
17241       if (NeedRotate) {
17242         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
17243         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
17244                "Unexpected type for rotate-able load pair");
17245         SDValue RotAmt =
17246             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
17247         // Target can convert to the identical ROTR if it does not have ROTL.
17248         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
17249       }
17250       NewStore = DAG.getStore(
17251           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
17252           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
17253     } else { // This must be the truncstore/extload case
17254       EVT ExtendedTy =
17255           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
17256       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
17257                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
17258                                FirstLoad->getPointerInfo(), JointMemOpVT,
17259                                FirstLoadAlign, LdMMOFlags);
17260       NewStore = DAG.getTruncStore(
17261           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
17262           FirstInChain->getPointerInfo(), JointMemOpVT,
17263           FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
17264     }
17265 
17266     // Transfer chain users from old loads to the new load.
17267     for (unsigned i = 0; i < NumElem; ++i) {
17268       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
17269       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
17270                                     SDValue(NewLoad.getNode(), 1));
17271     }
17272 
17273     // Replace all stores with the new store. Recursively remove corresponding
17274     // values if they are no longer used.
17275     for (unsigned i = 0; i < NumElem; ++i) {
17276       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
17277       CombineTo(StoreNodes[i].MemNode, NewStore);
17278       if (Val.getNode()->use_empty())
17279         recursivelyDeleteUnusedNodes(Val.getNode());
17280     }
17281 
17282     MadeChange = true;
17283     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
17284     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
17285     NumConsecutiveStores -= NumElem;
17286   }
17287   return MadeChange;
17288 }
17289 
17290 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
17291   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
17292     return false;
17293 
17294   // TODO: Extend this function to merge stores of scalable vectors.
17295   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
17296   // store since we know <vscale x 16 x i8> is exactly twice as large as
17297   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
17298   EVT MemVT = St->getMemoryVT();
17299   if (MemVT.isScalableVector())
17300     return false;
17301   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
17302     return false;
17303 
17304   // This function cannot currently deal with non-byte-sized memory sizes.
17305   int64_t ElementSizeBytes = MemVT.getStoreSize();
17306   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
17307     return false;
17308 
17309   // Do not bother looking at stored values that are not constants, loads, or
17310   // extracted vector elements.
17311   SDValue StoredVal = peekThroughBitcasts(St->getValue());
17312   const StoreSource StoreSrc = getStoreSource(StoredVal);
17313   if (StoreSrc == StoreSource::Unknown)
17314     return false;
17315 
17316   SmallVector<MemOpLink, 8> StoreNodes;
17317   SDNode *RootNode;
17318   // Find potential store merge candidates by searching through chain sub-DAG
17319   getStoreMergeCandidates(St, StoreNodes, RootNode);
17320 
17321   // Check if there is anything to merge.
17322   if (StoreNodes.size() < 2)
17323     return false;
17324 
17325   // Sort the memory operands according to their distance from the
17326   // base pointer.
17327   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
17328     return LHS.OffsetFromBase < RHS.OffsetFromBase;
17329   });
17330 
17331   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
17332       Attribute::NoImplicitFloat);
17333   bool IsNonTemporalStore = St->isNonTemporal();
17334   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
17335                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
17336 
17337   // Store Merge attempts to merge the lowest stores. This generally
17338   // works out as if successful, as the remaining stores are checked
17339   // after the first collection of stores is merged. However, in the
17340   // case that a non-mergeable store is found first, e.g., {p[-2],
17341   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
17342   // mergeable cases. To prevent this, we prune such stores from the
17343   // front of StoreNodes here.
17344   bool MadeChange = false;
17345   while (StoreNodes.size() > 1) {
17346     unsigned NumConsecutiveStores =
17347         getConsecutiveStores(StoreNodes, ElementSizeBytes);
17348     // There are no more stores in the list to examine.
17349     if (NumConsecutiveStores == 0)
17350       return MadeChange;
17351 
17352     // We have at least 2 consecutive stores. Try to merge them.
17353     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
17354     switch (StoreSrc) {
17355     case StoreSource::Constant:
17356       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
17357                                              MemVT, RootNode, AllowVectors);
17358       break;
17359 
17360     case StoreSource::Extract:
17361       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
17362                                             MemVT, RootNode);
17363       break;
17364 
17365     case StoreSource::Load:
17366       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
17367                                          MemVT, RootNode, AllowVectors,
17368                                          IsNonTemporalStore, IsNonTemporalLoad);
17369       break;
17370 
17371     default:
17372       llvm_unreachable("Unhandled store source type");
17373     }
17374   }
17375   return MadeChange;
17376 }
17377 
17378 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
17379   SDLoc SL(ST);
17380   SDValue ReplStore;
17381 
17382   // Replace the chain to avoid dependency.
17383   if (ST->isTruncatingStore()) {
17384     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
17385                                   ST->getBasePtr(), ST->getMemoryVT(),
17386                                   ST->getMemOperand());
17387   } else {
17388     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
17389                              ST->getMemOperand());
17390   }
17391 
17392   // Create token to keep both nodes around.
17393   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
17394                               MVT::Other, ST->getChain(), ReplStore);
17395 
17396   // Make sure the new and old chains are cleaned up.
17397   AddToWorklist(Token.getNode());
17398 
17399   // Don't add users to work list.
17400   return CombineTo(ST, Token, false);
17401 }
17402 
17403 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
17404   SDValue Value = ST->getValue();
17405   if (Value.getOpcode() == ISD::TargetConstantFP)
17406     return SDValue();
17407 
17408   if (!ISD::isNormalStore(ST))
17409     return SDValue();
17410 
17411   SDLoc DL(ST);
17412 
17413   SDValue Chain = ST->getChain();
17414   SDValue Ptr = ST->getBasePtr();
17415 
17416   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
17417 
17418   // NOTE: If the original store is volatile, this transform must not increase
17419   // the number of stores.  For example, on x86-32 an f64 can be stored in one
17420   // processor operation but an i64 (which is not legal) requires two.  So the
17421   // transform should not be done in this case.
17422 
17423   SDValue Tmp;
17424   switch (CFP->getSimpleValueType(0).SimpleTy) {
17425   default:
17426     llvm_unreachable("Unknown FP type");
17427   case MVT::f16:    // We don't do this for these yet.
17428   case MVT::f80:
17429   case MVT::f128:
17430   case MVT::ppcf128:
17431     return SDValue();
17432   case MVT::f32:
17433     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
17434         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17435       ;
17436       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
17437                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
17438                             MVT::i32);
17439       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
17440     }
17441 
17442     return SDValue();
17443   case MVT::f64:
17444     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
17445          ST->isSimple()) ||
17446         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
17447       ;
17448       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
17449                             getZExtValue(), SDLoc(CFP), MVT::i64);
17450       return DAG.getStore(Chain, DL, Tmp,
17451                           Ptr, ST->getMemOperand());
17452     }
17453 
17454     if (ST->isSimple() &&
17455         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
17456       // Many FP stores are not made apparent until after legalize, e.g. for
17457       // argument passing.  Since this is so common, custom legalize the
17458       // 64-bit integer store into two 32-bit stores.
17459       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
17460       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
17461       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
17462       if (DAG.getDataLayout().isBigEndian())
17463         std::swap(Lo, Hi);
17464 
17465       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17466       AAMDNodes AAInfo = ST->getAAInfo();
17467 
17468       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17469                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17470       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL);
17471       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
17472                                  ST->getPointerInfo().getWithOffset(4),
17473                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
17474       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
17475                          St0, St1);
17476     }
17477 
17478     return SDValue();
17479   }
17480 }
17481 
17482 SDValue DAGCombiner::visitSTORE(SDNode *N) {
17483   StoreSDNode *ST  = cast<StoreSDNode>(N);
17484   SDValue Chain = ST->getChain();
17485   SDValue Value = ST->getValue();
17486   SDValue Ptr   = ST->getBasePtr();
17487 
17488   // If this is a store of a bit convert, store the input value if the
17489   // resultant store does not need a higher alignment than the original.
17490   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
17491       ST->isUnindexed()) {
17492     EVT SVT = Value.getOperand(0).getValueType();
17493     // If the store is volatile, we only want to change the store type if the
17494     // resulting store is legal. Otherwise we might increase the number of
17495     // memory accesses. We don't care if the original type was legal or not
17496     // as we assume software couldn't rely on the number of accesses of an
17497     // illegal type.
17498     // TODO: May be able to relax for unordered atomics (see D66309)
17499     if (((!LegalOperations && ST->isSimple()) ||
17500          TLI.isOperationLegal(ISD::STORE, SVT)) &&
17501         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
17502                                      DAG, *ST->getMemOperand())) {
17503       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
17504                           ST->getMemOperand());
17505     }
17506   }
17507 
17508   // Turn 'store undef, Ptr' -> nothing.
17509   if (Value.isUndef() && ST->isUnindexed())
17510     return Chain;
17511 
17512   // Try to infer better alignment information than the store already has.
17513   if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) {
17514     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
17515       if (*Alignment > ST->getAlign() &&
17516           isAligned(*Alignment, ST->getSrcValueOffset())) {
17517         SDValue NewStore =
17518             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
17519                               ST->getMemoryVT(), *Alignment,
17520                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
17521         // NewStore will always be N as we are only refining the alignment
17522         assert(NewStore.getNode() == N);
17523         (void)NewStore;
17524       }
17525     }
17526   }
17527 
17528   // Try transforming a pair floating point load / store ops to integer
17529   // load / store ops.
17530   if (SDValue NewST = TransformFPLoadStorePair(N))
17531     return NewST;
17532 
17533   // Try transforming several stores into STORE (BSWAP).
17534   if (SDValue Store = mergeTruncStores(ST))
17535     return Store;
17536 
17537   if (ST->isUnindexed()) {
17538     // Walk up chain skipping non-aliasing memory nodes, on this store and any
17539     // adjacent stores.
17540     if (findBetterNeighborChains(ST)) {
17541       // replaceStoreChain uses CombineTo, which handled all of the worklist
17542       // manipulation. Return the original node to not do anything else.
17543       return SDValue(ST, 0);
17544     }
17545     Chain = ST->getChain();
17546   }
17547 
17548   // FIXME: is there such a thing as a truncating indexed store?
17549   if (ST->isTruncatingStore() && ST->isUnindexed() &&
17550       Value.getValueType().isInteger() &&
17551       (!isa<ConstantSDNode>(Value) ||
17552        !cast<ConstantSDNode>(Value)->isOpaque())) {
17553     APInt TruncDemandedBits =
17554         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
17555                              ST->getMemoryVT().getScalarSizeInBits());
17556 
17557     // See if we can simplify the input to this truncstore with knowledge that
17558     // only the low bits are being used.  For example:
17559     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
17560     AddToWorklist(Value.getNode());
17561     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
17562       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
17563                                ST->getMemOperand());
17564 
17565     // Otherwise, see if we can simplify the operation with
17566     // SimplifyDemandedBits, which only works if the value has a single use.
17567     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
17568       // Re-visit the store if anything changed and the store hasn't been merged
17569       // with another node (N is deleted) SimplifyDemandedBits will add Value's
17570       // node back to the worklist if necessary, but we also need to re-visit
17571       // the Store node itself.
17572       if (N->getOpcode() != ISD::DELETED_NODE)
17573         AddToWorklist(N);
17574       return SDValue(N, 0);
17575     }
17576   }
17577 
17578   // If this is a load followed by a store to the same location, then the store
17579   // is dead/noop.
17580   // TODO: Can relax for unordered atomics (see D66309)
17581   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
17582     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
17583         ST->isUnindexed() && ST->isSimple() &&
17584         // There can't be any side effects between the load and store, such as
17585         // a call or store.
17586         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
17587       // The store is dead, remove it.
17588       return Chain;
17589     }
17590   }
17591 
17592   // TODO: Can relax for unordered atomics (see D66309)
17593   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
17594     if (ST->isUnindexed() && ST->isSimple() &&
17595         ST1->isUnindexed() && ST1->isSimple()) {
17596       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
17597           ST->getMemoryVT() == ST1->getMemoryVT()) {
17598         // If this is a store followed by a store with the same value to the
17599         // same location, then the store is dead/noop.
17600         return Chain;
17601       }
17602 
17603       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
17604           !ST1->getBasePtr().isUndef() &&
17605           // BaseIndexOffset and the code below requires knowing the size
17606           // of a vector, so bail out if MemoryVT is scalable.
17607           !ST->getMemoryVT().isScalableVector() &&
17608           !ST1->getMemoryVT().isScalableVector()) {
17609         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
17610         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
17611         unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
17612         unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
17613         // If this is a store who's preceding store to a subset of the current
17614         // location and no one other node is chained to that store we can
17615         // effectively drop the store. Do not remove stores to undef as they may
17616         // be used as data sinks.
17617         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
17618           CombineTo(ST1, ST1->getChain());
17619           return SDValue();
17620         }
17621       }
17622     }
17623   }
17624 
17625   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
17626   // truncating store.  We can do this even if this is already a truncstore.
17627   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
17628       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
17629       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
17630                             ST->getMemoryVT())) {
17631     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
17632                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
17633   }
17634 
17635   // Always perform this optimization before types are legal. If the target
17636   // prefers, also try this after legalization to catch stores that were created
17637   // by intrinsics or other nodes.
17638   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
17639     while (true) {
17640       // There can be multiple store sequences on the same chain.
17641       // Keep trying to merge store sequences until we are unable to do so
17642       // or until we merge the last store on the chain.
17643       bool Changed = mergeConsecutiveStores(ST);
17644       if (!Changed) break;
17645       // Return N as merge only uses CombineTo and no worklist clean
17646       // up is necessary.
17647       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
17648         return SDValue(N, 0);
17649     }
17650   }
17651 
17652   // Try transforming N to an indexed store.
17653   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
17654     return SDValue(N, 0);
17655 
17656   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
17657   //
17658   // Make sure to do this only after attempting to merge stores in order to
17659   //  avoid changing the types of some subset of stores due to visit order,
17660   //  preventing their merging.
17661   if (isa<ConstantFPSDNode>(ST->getValue())) {
17662     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
17663       return NewSt;
17664   }
17665 
17666   if (SDValue NewSt = splitMergedValStore(ST))
17667     return NewSt;
17668 
17669   return ReduceLoadOpStoreWidth(N);
17670 }
17671 
17672 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
17673   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
17674   if (!LifetimeEnd->hasOffset())
17675     return SDValue();
17676 
17677   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
17678                                         LifetimeEnd->getOffset(), false);
17679 
17680   // We walk up the chains to find stores.
17681   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
17682   while (!Chains.empty()) {
17683     SDValue Chain = Chains.back();
17684     Chains.pop_back();
17685     if (!Chain.hasOneUse())
17686       continue;
17687     switch (Chain.getOpcode()) {
17688     case ISD::TokenFactor:
17689       for (unsigned Nops = Chain.getNumOperands(); Nops;)
17690         Chains.push_back(Chain.getOperand(--Nops));
17691       break;
17692     case ISD::LIFETIME_START:
17693     case ISD::LIFETIME_END:
17694       // We can forward past any lifetime start/end that can be proven not to
17695       // alias the node.
17696       if (!isAlias(Chain.getNode(), N))
17697         Chains.push_back(Chain.getOperand(0));
17698       break;
17699     case ISD::STORE: {
17700       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
17701       // TODO: Can relax for unordered atomics (see D66309)
17702       if (!ST->isSimple() || ST->isIndexed())
17703         continue;
17704       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
17705       // The bounds of a scalable store are not known until runtime, so this
17706       // store cannot be elided.
17707       if (StoreSize.isScalable())
17708         continue;
17709       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
17710       // If we store purely within object bounds just before its lifetime ends,
17711       // we can remove the store.
17712       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
17713                                    StoreSize.getFixedSize() * 8)) {
17714         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
17715                    dbgs() << "\nwithin LIFETIME_END of : ";
17716                    LifetimeEndBase.dump(); dbgs() << "\n");
17717         CombineTo(ST, ST->getChain());
17718         return SDValue(N, 0);
17719       }
17720     }
17721     }
17722   }
17723   return SDValue();
17724 }
17725 
17726 /// For the instruction sequence of store below, F and I values
17727 /// are bundled together as an i64 value before being stored into memory.
17728 /// Sometimes it is more efficent to generate separate stores for F and I,
17729 /// which can remove the bitwise instructions or sink them to colder places.
17730 ///
17731 ///   (store (or (zext (bitcast F to i32) to i64),
17732 ///              (shl (zext I to i64), 32)), addr)  -->
17733 ///   (store F, addr) and (store I, addr+4)
17734 ///
17735 /// Similarly, splitting for other merged store can also be beneficial, like:
17736 /// For pair of {i32, i32}, i64 store --> two i32 stores.
17737 /// For pair of {i32, i16}, i64 store --> two i32 stores.
17738 /// For pair of {i16, i16}, i32 store --> two i16 stores.
17739 /// For pair of {i16, i8},  i32 store --> two i16 stores.
17740 /// For pair of {i8, i8},   i16 store --> two i8 stores.
17741 ///
17742 /// We allow each target to determine specifically which kind of splitting is
17743 /// supported.
17744 ///
17745 /// The store patterns are commonly seen from the simple code snippet below
17746 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
17747 ///   void goo(const std::pair<int, float> &);
17748 ///   hoo() {
17749 ///     ...
17750 ///     goo(std::make_pair(tmp, ftmp));
17751 ///     ...
17752 ///   }
17753 ///
17754 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
17755   if (OptLevel == CodeGenOpt::None)
17756     return SDValue();
17757 
17758   // Can't change the number of memory accesses for a volatile store or break
17759   // atomicity for an atomic one.
17760   if (!ST->isSimple())
17761     return SDValue();
17762 
17763   SDValue Val = ST->getValue();
17764   SDLoc DL(ST);
17765 
17766   // Match OR operand.
17767   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
17768     return SDValue();
17769 
17770   // Match SHL operand and get Lower and Higher parts of Val.
17771   SDValue Op1 = Val.getOperand(0);
17772   SDValue Op2 = Val.getOperand(1);
17773   SDValue Lo, Hi;
17774   if (Op1.getOpcode() != ISD::SHL) {
17775     std::swap(Op1, Op2);
17776     if (Op1.getOpcode() != ISD::SHL)
17777       return SDValue();
17778   }
17779   Lo = Op2;
17780   Hi = Op1.getOperand(0);
17781   if (!Op1.hasOneUse())
17782     return SDValue();
17783 
17784   // Match shift amount to HalfValBitSize.
17785   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
17786   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
17787   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
17788     return SDValue();
17789 
17790   // Lo and Hi are zero-extended from int with size less equal than 32
17791   // to i64.
17792   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
17793       !Lo.getOperand(0).getValueType().isScalarInteger() ||
17794       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
17795       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
17796       !Hi.getOperand(0).getValueType().isScalarInteger() ||
17797       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
17798     return SDValue();
17799 
17800   // Use the EVT of low and high parts before bitcast as the input
17801   // of target query.
17802   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
17803                   ? Lo.getOperand(0).getValueType()
17804                   : Lo.getValueType();
17805   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
17806                    ? Hi.getOperand(0).getValueType()
17807                    : Hi.getValueType();
17808   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
17809     return SDValue();
17810 
17811   // Start to split store.
17812   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
17813   AAMDNodes AAInfo = ST->getAAInfo();
17814 
17815   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
17816   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
17817   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
17818   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
17819 
17820   SDValue Chain = ST->getChain();
17821   SDValue Ptr = ST->getBasePtr();
17822   // Lower value store.
17823   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
17824                              ST->getOriginalAlign(), MMOFlags, AAInfo);
17825   Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL);
17826   // Higher value store.
17827   SDValue St1 = DAG.getStore(
17828       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
17829       ST->getOriginalAlign(), MMOFlags, AAInfo);
17830   return St1;
17831 }
17832 
17833 /// Convert a disguised subvector insertion into a shuffle:
17834 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
17835   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
17836          "Expected extract_vector_elt");
17837   SDValue InsertVal = N->getOperand(1);
17838   SDValue Vec = N->getOperand(0);
17839 
17840   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
17841   // InsIndex)
17842   //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
17843   //   CONCAT_VECTORS.
17844   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
17845       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17846       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
17847     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
17848     ArrayRef<int> Mask = SVN->getMask();
17849 
17850     SDValue X = Vec.getOperand(0);
17851     SDValue Y = Vec.getOperand(1);
17852 
17853     // Vec's operand 0 is using indices from 0 to N-1 and
17854     // operand 1 from N to 2N - 1, where N is the number of
17855     // elements in the vectors.
17856     SDValue InsertVal0 = InsertVal.getOperand(0);
17857     int ElementOffset = -1;
17858 
17859     // We explore the inputs of the shuffle in order to see if we find the
17860     // source of the extract_vector_elt. If so, we can use it to modify the
17861     // shuffle rather than perform an insert_vector_elt.
17862     SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
17863     ArgWorkList.emplace_back(Mask.size(), Y);
17864     ArgWorkList.emplace_back(0, X);
17865 
17866     while (!ArgWorkList.empty()) {
17867       int ArgOffset;
17868       SDValue ArgVal;
17869       std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
17870 
17871       if (ArgVal == InsertVal0) {
17872         ElementOffset = ArgOffset;
17873         break;
17874       }
17875 
17876       // Peek through concat_vector.
17877       if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
17878         int CurrentArgOffset =
17879             ArgOffset + ArgVal.getValueType().getVectorNumElements();
17880         int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
17881         for (SDValue Op : reverse(ArgVal->ops())) {
17882           CurrentArgOffset -= Step;
17883           ArgWorkList.emplace_back(CurrentArgOffset, Op);
17884         }
17885 
17886         // Make sure we went through all the elements and did not screw up index
17887         // computation.
17888         assert(CurrentArgOffset == ArgOffset);
17889       }
17890     }
17891 
17892     if (ElementOffset != -1) {
17893       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
17894 
17895       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
17896       NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue();
17897       assert(NewMask[InsIndex] <
17898                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
17899              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
17900 
17901       SDValue LegalShuffle =
17902               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
17903                                           Y, NewMask, DAG);
17904       if (LegalShuffle)
17905         return LegalShuffle;
17906     }
17907   }
17908 
17909   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
17910   // bitcast(shuffle (bitcast V), (extended X), Mask)
17911   // Note: We do not use an insert_subvector node because that requires a
17912   // legal subvector type.
17913   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
17914       !InsertVal.getOperand(0).getValueType().isVector())
17915     return SDValue();
17916 
17917   SDValue SubVec = InsertVal.getOperand(0);
17918   SDValue DestVec = N->getOperand(0);
17919   EVT SubVecVT = SubVec.getValueType();
17920   EVT VT = DestVec.getValueType();
17921   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
17922   // If the source only has a single vector element, the cost of creating adding
17923   // it to a vector is likely to exceed the cost of a insert_vector_elt.
17924   if (NumSrcElts == 1)
17925     return SDValue();
17926   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
17927   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
17928 
17929   // Step 1: Create a shuffle mask that implements this insert operation. The
17930   // vector that we are inserting into will be operand 0 of the shuffle, so
17931   // those elements are just 'i'. The inserted subvector is in the first
17932   // positions of operand 1 of the shuffle. Example:
17933   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
17934   SmallVector<int, 16> Mask(NumMaskVals);
17935   for (unsigned i = 0; i != NumMaskVals; ++i) {
17936     if (i / NumSrcElts == InsIndex)
17937       Mask[i] = (i % NumSrcElts) + NumMaskVals;
17938     else
17939       Mask[i] = i;
17940   }
17941 
17942   // Bail out if the target can not handle the shuffle we want to create.
17943   EVT SubVecEltVT = SubVecVT.getVectorElementType();
17944   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
17945   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
17946     return SDValue();
17947 
17948   // Step 2: Create a wide vector from the inserted source vector by appending
17949   // undefined elements. This is the same size as our destination vector.
17950   SDLoc DL(N);
17951   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
17952   ConcatOps[0] = SubVec;
17953   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
17954 
17955   // Step 3: Shuffle in the padded subvector.
17956   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
17957   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
17958   AddToWorklist(PaddedSubV.getNode());
17959   AddToWorklist(DestVecBC.getNode());
17960   AddToWorklist(Shuf.getNode());
17961   return DAG.getBitcast(VT, Shuf);
17962 }
17963 
17964 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
17965   SDValue InVec = N->getOperand(0);
17966   SDValue InVal = N->getOperand(1);
17967   SDValue EltNo = N->getOperand(2);
17968   SDLoc DL(N);
17969 
17970   EVT VT = InVec.getValueType();
17971   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
17972 
17973   // Insert into out-of-bounds element is undefined.
17974   if (IndexC && VT.isFixedLengthVector() &&
17975       IndexC->getZExtValue() >= VT.getVectorNumElements())
17976     return DAG.getUNDEF(VT);
17977 
17978   // Remove redundant insertions:
17979   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
17980   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17981       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
17982     return InVec;
17983 
17984   if (!IndexC) {
17985     // If this is variable insert to undef vector, it might be better to splat:
17986     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
17987     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
17988       if (VT.isScalableVector())
17989         return DAG.getSplatVector(VT, DL, InVal);
17990       else {
17991         SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal);
17992         return DAG.getBuildVector(VT, DL, Ops);
17993       }
17994     }
17995     return SDValue();
17996   }
17997 
17998   if (VT.isScalableVector())
17999     return SDValue();
18000 
18001   unsigned NumElts = VT.getVectorNumElements();
18002 
18003   // We must know which element is being inserted for folds below here.
18004   unsigned Elt = IndexC->getZExtValue();
18005   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
18006     return Shuf;
18007 
18008   // Canonicalize insert_vector_elt dag nodes.
18009   // Example:
18010   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
18011   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
18012   //
18013   // Do this only if the child insert_vector node has one use; also
18014   // do this only if indices are both constants and Idx1 < Idx0.
18015   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
18016       && isa<ConstantSDNode>(InVec.getOperand(2))) {
18017     unsigned OtherElt = InVec.getConstantOperandVal(2);
18018     if (Elt < OtherElt) {
18019       // Swap nodes.
18020       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
18021                                   InVec.getOperand(0), InVal, EltNo);
18022       AddToWorklist(NewOp.getNode());
18023       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
18024                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
18025     }
18026   }
18027 
18028   // If we can't generate a legal BUILD_VECTOR, exit
18029   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
18030     return SDValue();
18031 
18032   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
18033   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
18034   // vector elements.
18035   SmallVector<SDValue, 8> Ops;
18036   // Do not combine these two vectors if the output vector will not replace
18037   // the input vector.
18038   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
18039     Ops.append(InVec.getNode()->op_begin(),
18040                InVec.getNode()->op_end());
18041   } else if (InVec.isUndef()) {
18042     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
18043   } else {
18044     return SDValue();
18045   }
18046   assert(Ops.size() == NumElts && "Unexpected vector size");
18047 
18048   // Insert the element
18049   if (Elt < Ops.size()) {
18050     // All the operands of BUILD_VECTOR must have the same type;
18051     // we enforce that here.
18052     EVT OpVT = Ops[0].getValueType();
18053     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
18054   }
18055 
18056   // Return the new vector
18057   return DAG.getBuildVector(VT, DL, Ops);
18058 }
18059 
18060 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
18061                                                   SDValue EltNo,
18062                                                   LoadSDNode *OriginalLoad) {
18063   assert(OriginalLoad->isSimple());
18064 
18065   EVT ResultVT = EVE->getValueType(0);
18066   EVT VecEltVT = InVecVT.getVectorElementType();
18067 
18068   // If the vector element type is not a multiple of a byte then we are unable
18069   // to correctly compute an address to load only the extracted element as a
18070   // scalar.
18071   if (!VecEltVT.isByteSized())
18072     return SDValue();
18073 
18074   Align Alignment = OriginalLoad->getAlign();
18075   Align NewAlign = DAG.getDataLayout().getABITypeAlign(
18076       VecEltVT.getTypeForEVT(*DAG.getContext()));
18077 
18078   if (NewAlign > Alignment ||
18079       !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
18080     return SDValue();
18081 
18082   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
18083     ISD::NON_EXTLOAD : ISD::EXTLOAD;
18084   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
18085     return SDValue();
18086 
18087   Alignment = NewAlign;
18088 
18089   SDValue NewPtr = OriginalLoad->getBasePtr();
18090   SDValue Offset;
18091   EVT PtrType = NewPtr.getValueType();
18092   MachinePointerInfo MPI;
18093   SDLoc DL(EVE);
18094   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
18095     int Elt = ConstEltNo->getZExtValue();
18096     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
18097     Offset = DAG.getConstant(PtrOff, DL, PtrType);
18098     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
18099   } else {
18100     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
18101     Offset = DAG.getNode(
18102         ISD::MUL, DL, PtrType, Offset,
18103         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
18104     // Discard the pointer info except the address space because the memory
18105     // operand can't represent this new access since the offset is variable.
18106     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
18107   }
18108   NewPtr = DAG.getMemBasePlusOffset(NewPtr, Offset, DL);
18109 
18110   // The replacement we need to do here is a little tricky: we need to
18111   // replace an extractelement of a load with a load.
18112   // Use ReplaceAllUsesOfValuesWith to do the replacement.
18113   // Note that this replacement assumes that the extractvalue is the only
18114   // use of the load; that's okay because we don't want to perform this
18115   // transformation in other cases anyway.
18116   SDValue Load;
18117   SDValue Chain;
18118   if (ResultVT.bitsGT(VecEltVT)) {
18119     // If the result type of vextract is wider than the load, then issue an
18120     // extending load instead.
18121     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
18122                                                   VecEltVT)
18123                                    ? ISD::ZEXTLOAD
18124                                    : ISD::EXTLOAD;
18125     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
18126                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
18127                           Alignment, OriginalLoad->getMemOperand()->getFlags(),
18128                           OriginalLoad->getAAInfo());
18129     Chain = Load.getValue(1);
18130   } else {
18131     Load = DAG.getLoad(
18132         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment,
18133         OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo());
18134     Chain = Load.getValue(1);
18135     if (ResultVT.bitsLT(VecEltVT))
18136       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
18137     else
18138       Load = DAG.getBitcast(ResultVT, Load);
18139   }
18140   WorklistRemover DeadNodes(*this);
18141   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
18142   SDValue To[] = { Load, Chain };
18143   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
18144   // Make sure to revisit this node to clean it up; it will usually be dead.
18145   AddToWorklist(EVE);
18146   // Since we're explicitly calling ReplaceAllUses, add the new node to the
18147   // worklist explicitly as well.
18148   AddToWorklistWithUsers(Load.getNode());
18149   ++OpsNarrowed;
18150   return SDValue(EVE, 0);
18151 }
18152 
18153 /// Transform a vector binary operation into a scalar binary operation by moving
18154 /// the math/logic after an extract element of a vector.
18155 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
18156                                        bool LegalOperations) {
18157   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18158   SDValue Vec = ExtElt->getOperand(0);
18159   SDValue Index = ExtElt->getOperand(1);
18160   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18161   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
18162       Vec.getNode()->getNumValues() != 1)
18163     return SDValue();
18164 
18165   // Targets may want to avoid this to prevent an expensive register transfer.
18166   if (!TLI.shouldScalarizeBinop(Vec))
18167     return SDValue();
18168 
18169   // Extracting an element of a vector constant is constant-folded, so this
18170   // transform is just replacing a vector op with a scalar op while moving the
18171   // extract.
18172   SDValue Op0 = Vec.getOperand(0);
18173   SDValue Op1 = Vec.getOperand(1);
18174   if (isAnyConstantBuildVector(Op0, true) ||
18175       isAnyConstantBuildVector(Op1, true)) {
18176     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
18177     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
18178     SDLoc DL(ExtElt);
18179     EVT VT = ExtElt->getValueType(0);
18180     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
18181     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
18182     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
18183   }
18184 
18185   return SDValue();
18186 }
18187 
18188 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
18189   SDValue VecOp = N->getOperand(0);
18190   SDValue Index = N->getOperand(1);
18191   EVT ScalarVT = N->getValueType(0);
18192   EVT VecVT = VecOp.getValueType();
18193   if (VecOp.isUndef())
18194     return DAG.getUNDEF(ScalarVT);
18195 
18196   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
18197   //
18198   // This only really matters if the index is non-constant since other combines
18199   // on the constant elements already work.
18200   SDLoc DL(N);
18201   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
18202       Index == VecOp.getOperand(2)) {
18203     SDValue Elt = VecOp.getOperand(1);
18204     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
18205   }
18206 
18207   // (vextract (scalar_to_vector val, 0) -> val
18208   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18209     // Only 0'th element of SCALAR_TO_VECTOR is defined.
18210     if (DAG.isKnownNeverZero(Index))
18211       return DAG.getUNDEF(ScalarVT);
18212 
18213     // Check if the result type doesn't match the inserted element type. A
18214     // SCALAR_TO_VECTOR may truncate the inserted element and the
18215     // EXTRACT_VECTOR_ELT may widen the extracted vector.
18216     SDValue InOp = VecOp.getOperand(0);
18217     if (InOp.getValueType() != ScalarVT) {
18218       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18219       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18220     }
18221     return InOp;
18222   }
18223 
18224   // extract_vector_elt of out-of-bounds element -> UNDEF
18225   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18226   if (IndexC && VecVT.isFixedLengthVector() &&
18227       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
18228     return DAG.getUNDEF(ScalarVT);
18229 
18230   // extract_vector_elt (build_vector x, y), 1 -> y
18231   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
18232        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
18233       TLI.isTypeLegal(VecVT) &&
18234       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
18235     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
18236             VecVT.isFixedLengthVector()) &&
18237            "BUILD_VECTOR used for scalable vectors");
18238     unsigned IndexVal =
18239         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
18240     SDValue Elt = VecOp.getOperand(IndexVal);
18241     EVT InEltVT = Elt.getValueType();
18242 
18243     // Sometimes build_vector's scalar input types do not match result type.
18244     if (ScalarVT == InEltVT)
18245       return Elt;
18246 
18247     // TODO: It may be useful to truncate if free if the build_vector implicitly
18248     // converts.
18249   }
18250 
18251   if (VecVT.isScalableVector())
18252     return SDValue();
18253 
18254   // All the code from this point onwards assumes fixed width vectors, but it's
18255   // possible that some of the combinations could be made to work for scalable
18256   // vectors too.
18257   unsigned NumElts = VecVT.getVectorNumElements();
18258   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
18259 
18260   // TODO: These transforms should not require the 'hasOneUse' restriction, but
18261   // there are regressions on multiple targets without it. We can end up with a
18262   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
18263   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
18264       VecOp.hasOneUse()) {
18265     // The vector index of the LSBs of the source depend on the endian-ness.
18266     bool IsLE = DAG.getDataLayout().isLittleEndian();
18267     unsigned ExtractIndex = IndexC->getZExtValue();
18268     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
18269     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
18270     SDValue BCSrc = VecOp.getOperand(0);
18271     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
18272       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
18273 
18274     if (LegalTypes && BCSrc.getValueType().isInteger() &&
18275         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18276       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
18277       // trunc i64 X to i32
18278       SDValue X = BCSrc.getOperand(0);
18279       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
18280              "Extract element and scalar to vector can't change element type "
18281              "from FP to integer.");
18282       unsigned XBitWidth = X.getValueSizeInBits();
18283       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
18284 
18285       // An extract element return value type can be wider than its vector
18286       // operand element type. In that case, the high bits are undefined, so
18287       // it's possible that we may need to extend rather than truncate.
18288       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
18289         assert(XBitWidth % VecEltBitWidth == 0 &&
18290                "Scalar bitwidth must be a multiple of vector element bitwidth");
18291         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
18292       }
18293     }
18294   }
18295 
18296   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
18297     return BO;
18298 
18299   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
18300   // We only perform this optimization before the op legalization phase because
18301   // we may introduce new vector instructions which are not backed by TD
18302   // patterns. For example on AVX, extracting elements from a wide vector
18303   // without using extract_subvector. However, if we can find an underlying
18304   // scalar value, then we can always use that.
18305   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
18306     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
18307     // Find the new index to extract from.
18308     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
18309 
18310     // Extracting an undef index is undef.
18311     if (OrigElt == -1)
18312       return DAG.getUNDEF(ScalarVT);
18313 
18314     // Select the right vector half to extract from.
18315     SDValue SVInVec;
18316     if (OrigElt < (int)NumElts) {
18317       SVInVec = VecOp.getOperand(0);
18318     } else {
18319       SVInVec = VecOp.getOperand(1);
18320       OrigElt -= NumElts;
18321     }
18322 
18323     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
18324       SDValue InOp = SVInVec.getOperand(OrigElt);
18325       if (InOp.getValueType() != ScalarVT) {
18326         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
18327         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
18328       }
18329 
18330       return InOp;
18331     }
18332 
18333     // FIXME: We should handle recursing on other vector shuffles and
18334     // scalar_to_vector here as well.
18335 
18336     if (!LegalOperations ||
18337         // FIXME: Should really be just isOperationLegalOrCustom.
18338         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
18339         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
18340       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
18341                          DAG.getVectorIdxConstant(OrigElt, DL));
18342     }
18343   }
18344 
18345   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
18346   // simplify it based on the (valid) extraction indices.
18347   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
18348         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
18349                Use->getOperand(0) == VecOp &&
18350                isa<ConstantSDNode>(Use->getOperand(1));
18351       })) {
18352     APInt DemandedElts = APInt::getNullValue(NumElts);
18353     for (SDNode *Use : VecOp->uses()) {
18354       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
18355       if (CstElt->getAPIntValue().ult(NumElts))
18356         DemandedElts.setBit(CstElt->getZExtValue());
18357     }
18358     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
18359       // We simplified the vector operand of this extract element. If this
18360       // extract is not dead, visit it again so it is folded properly.
18361       if (N->getOpcode() != ISD::DELETED_NODE)
18362         AddToWorklist(N);
18363       return SDValue(N, 0);
18364     }
18365     APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
18366     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
18367       // We simplified the vector operand of this extract element. If this
18368       // extract is not dead, visit it again so it is folded properly.
18369       if (N->getOpcode() != ISD::DELETED_NODE)
18370         AddToWorklist(N);
18371       return SDValue(N, 0);
18372     }
18373   }
18374 
18375   // Everything under here is trying to match an extract of a loaded value.
18376   // If the result of load has to be truncated, then it's not necessarily
18377   // profitable.
18378   bool BCNumEltsChanged = false;
18379   EVT ExtVT = VecVT.getVectorElementType();
18380   EVT LVT = ExtVT;
18381   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
18382     return SDValue();
18383 
18384   if (VecOp.getOpcode() == ISD::BITCAST) {
18385     // Don't duplicate a load with other uses.
18386     if (!VecOp.hasOneUse())
18387       return SDValue();
18388 
18389     EVT BCVT = VecOp.getOperand(0).getValueType();
18390     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
18391       return SDValue();
18392     if (NumElts != BCVT.getVectorNumElements())
18393       BCNumEltsChanged = true;
18394     VecOp = VecOp.getOperand(0);
18395     ExtVT = BCVT.getVectorElementType();
18396   }
18397 
18398   // extract (vector load $addr), i --> load $addr + i * size
18399   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
18400       ISD::isNormalLoad(VecOp.getNode()) &&
18401       !Index->hasPredecessor(VecOp.getNode())) {
18402     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
18403     if (VecLoad && VecLoad->isSimple())
18404       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
18405   }
18406 
18407   // Perform only after legalization to ensure build_vector / vector_shuffle
18408   // optimizations have already been done.
18409   if (!LegalOperations || !IndexC)
18410     return SDValue();
18411 
18412   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
18413   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
18414   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
18415   int Elt = IndexC->getZExtValue();
18416   LoadSDNode *LN0 = nullptr;
18417   if (ISD::isNormalLoad(VecOp.getNode())) {
18418     LN0 = cast<LoadSDNode>(VecOp);
18419   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
18420              VecOp.getOperand(0).getValueType() == ExtVT &&
18421              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
18422     // Don't duplicate a load with other uses.
18423     if (!VecOp.hasOneUse())
18424       return SDValue();
18425 
18426     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
18427   }
18428   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
18429     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
18430     // =>
18431     // (load $addr+1*size)
18432 
18433     // Don't duplicate a load with other uses.
18434     if (!VecOp.hasOneUse())
18435       return SDValue();
18436 
18437     // If the bit convert changed the number of elements, it is unsafe
18438     // to examine the mask.
18439     if (BCNumEltsChanged)
18440       return SDValue();
18441 
18442     // Select the input vector, guarding against out of range extract vector.
18443     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
18444     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
18445 
18446     if (VecOp.getOpcode() == ISD::BITCAST) {
18447       // Don't duplicate a load with other uses.
18448       if (!VecOp.hasOneUse())
18449         return SDValue();
18450 
18451       VecOp = VecOp.getOperand(0);
18452     }
18453     if (ISD::isNormalLoad(VecOp.getNode())) {
18454       LN0 = cast<LoadSDNode>(VecOp);
18455       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
18456       Index = DAG.getConstant(Elt, DL, Index.getValueType());
18457     }
18458   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
18459              VecVT.getVectorElementType() == ScalarVT &&
18460              (!LegalTypes ||
18461               TLI.isTypeLegal(
18462                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
18463     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
18464     //      -> extract_vector_elt a, 0
18465     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
18466     //      -> extract_vector_elt a, 1
18467     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
18468     //      -> extract_vector_elt b, 0
18469     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
18470     //      -> extract_vector_elt b, 1
18471     SDLoc SL(N);
18472     EVT ConcatVT = VecOp.getOperand(0).getValueType();
18473     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
18474     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
18475                                      Index.getValueType());
18476 
18477     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
18478     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
18479                               ConcatVT.getVectorElementType(),
18480                               ConcatOp, NewIdx);
18481     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
18482   }
18483 
18484   // Make sure we found a non-volatile load and the extractelement is
18485   // the only use.
18486   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
18487     return SDValue();
18488 
18489   // If Idx was -1 above, Elt is going to be -1, so just return undef.
18490   if (Elt == -1)
18491     return DAG.getUNDEF(LVT);
18492 
18493   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
18494 }
18495 
18496 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
18497 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
18498   // We perform this optimization post type-legalization because
18499   // the type-legalizer often scalarizes integer-promoted vectors.
18500   // Performing this optimization before may create bit-casts which
18501   // will be type-legalized to complex code sequences.
18502   // We perform this optimization only before the operation legalizer because we
18503   // may introduce illegal operations.
18504   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
18505     return SDValue();
18506 
18507   unsigned NumInScalars = N->getNumOperands();
18508   SDLoc DL(N);
18509   EVT VT = N->getValueType(0);
18510 
18511   // Check to see if this is a BUILD_VECTOR of a bunch of values
18512   // which come from any_extend or zero_extend nodes. If so, we can create
18513   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
18514   // optimizations. We do not handle sign-extend because we can't fill the sign
18515   // using shuffles.
18516   EVT SourceType = MVT::Other;
18517   bool AllAnyExt = true;
18518 
18519   for (unsigned i = 0; i != NumInScalars; ++i) {
18520     SDValue In = N->getOperand(i);
18521     // Ignore undef inputs.
18522     if (In.isUndef()) continue;
18523 
18524     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
18525     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
18526 
18527     // Abort if the element is not an extension.
18528     if (!ZeroExt && !AnyExt) {
18529       SourceType = MVT::Other;
18530       break;
18531     }
18532 
18533     // The input is a ZeroExt or AnyExt. Check the original type.
18534     EVT InTy = In.getOperand(0).getValueType();
18535 
18536     // Check that all of the widened source types are the same.
18537     if (SourceType == MVT::Other)
18538       // First time.
18539       SourceType = InTy;
18540     else if (InTy != SourceType) {
18541       // Multiple income types. Abort.
18542       SourceType = MVT::Other;
18543       break;
18544     }
18545 
18546     // Check if all of the extends are ANY_EXTENDs.
18547     AllAnyExt &= AnyExt;
18548   }
18549 
18550   // In order to have valid types, all of the inputs must be extended from the
18551   // same source type and all of the inputs must be any or zero extend.
18552   // Scalar sizes must be a power of two.
18553   EVT OutScalarTy = VT.getScalarType();
18554   bool ValidTypes = SourceType != MVT::Other &&
18555                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
18556                  isPowerOf2_32(SourceType.getSizeInBits());
18557 
18558   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
18559   // turn into a single shuffle instruction.
18560   if (!ValidTypes)
18561     return SDValue();
18562 
18563   // If we already have a splat buildvector, then don't fold it if it means
18564   // introducing zeros.
18565   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
18566     return SDValue();
18567 
18568   bool isLE = DAG.getDataLayout().isLittleEndian();
18569   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
18570   assert(ElemRatio > 1 && "Invalid element size ratio");
18571   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
18572                                DAG.getConstant(0, DL, SourceType);
18573 
18574   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
18575   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
18576 
18577   // Populate the new build_vector
18578   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18579     SDValue Cast = N->getOperand(i);
18580     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
18581             Cast.getOpcode() == ISD::ZERO_EXTEND ||
18582             Cast.isUndef()) && "Invalid cast opcode");
18583     SDValue In;
18584     if (Cast.isUndef())
18585       In = DAG.getUNDEF(SourceType);
18586     else
18587       In = Cast->getOperand(0);
18588     unsigned Index = isLE ? (i * ElemRatio) :
18589                             (i * ElemRatio + (ElemRatio - 1));
18590 
18591     assert(Index < Ops.size() && "Invalid index");
18592     Ops[Index] = In;
18593   }
18594 
18595   // The type of the new BUILD_VECTOR node.
18596   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
18597   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
18598          "Invalid vector size");
18599   // Check if the new vector type is legal.
18600   if (!isTypeLegal(VecVT) ||
18601       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
18602        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
18603     return SDValue();
18604 
18605   // Make the new BUILD_VECTOR.
18606   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
18607 
18608   // The new BUILD_VECTOR node has the potential to be further optimized.
18609   AddToWorklist(BV.getNode());
18610   // Bitcast to the desired type.
18611   return DAG.getBitcast(VT, BV);
18612 }
18613 
18614 // Simplify (build_vec (trunc $1)
18615 //                     (trunc (srl $1 half-width))
18616 //                     (trunc (srl $1 (2 * half-width))) …)
18617 // to (bitcast $1)
18618 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
18619   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18620 
18621   // Only for little endian
18622   if (!DAG.getDataLayout().isLittleEndian())
18623     return SDValue();
18624 
18625   SDLoc DL(N);
18626   EVT VT = N->getValueType(0);
18627   EVT OutScalarTy = VT.getScalarType();
18628   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
18629 
18630   // Only for power of two types to be sure that bitcast works well
18631   if (!isPowerOf2_64(ScalarTypeBitsize))
18632     return SDValue();
18633 
18634   unsigned NumInScalars = N->getNumOperands();
18635 
18636   // Look through bitcasts
18637   auto PeekThroughBitcast = [](SDValue Op) {
18638     if (Op.getOpcode() == ISD::BITCAST)
18639       return Op.getOperand(0);
18640     return Op;
18641   };
18642 
18643   // The source value where all the parts are extracted.
18644   SDValue Src;
18645   for (unsigned i = 0; i != NumInScalars; ++i) {
18646     SDValue In = PeekThroughBitcast(N->getOperand(i));
18647     // Ignore undef inputs.
18648     if (In.isUndef()) continue;
18649 
18650     if (In.getOpcode() != ISD::TRUNCATE)
18651       return SDValue();
18652 
18653     In = PeekThroughBitcast(In.getOperand(0));
18654 
18655     if (In.getOpcode() != ISD::SRL) {
18656       // For now only build_vec without shuffling, handle shifts here in the
18657       // future.
18658       if (i != 0)
18659         return SDValue();
18660 
18661       Src = In;
18662     } else {
18663       // In is SRL
18664       SDValue part = PeekThroughBitcast(In.getOperand(0));
18665 
18666       if (!Src) {
18667         Src = part;
18668       } else if (Src != part) {
18669         // Vector parts do not stem from the same variable
18670         return SDValue();
18671       }
18672 
18673       SDValue ShiftAmtVal = In.getOperand(1);
18674       if (!isa<ConstantSDNode>(ShiftAmtVal))
18675         return SDValue();
18676 
18677       uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1);
18678 
18679       // The extracted value is not extracted at the right position
18680       if (ShiftAmt != i * ScalarTypeBitsize)
18681         return SDValue();
18682     }
18683   }
18684 
18685   // Only cast if the size is the same
18686   if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
18687     return SDValue();
18688 
18689   return DAG.getBitcast(VT, Src);
18690 }
18691 
18692 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
18693                                            ArrayRef<int> VectorMask,
18694                                            SDValue VecIn1, SDValue VecIn2,
18695                                            unsigned LeftIdx, bool DidSplitVec) {
18696   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
18697 
18698   EVT VT = N->getValueType(0);
18699   EVT InVT1 = VecIn1.getValueType();
18700   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
18701 
18702   unsigned NumElems = VT.getVectorNumElements();
18703   unsigned ShuffleNumElems = NumElems;
18704 
18705   // If we artificially split a vector in two already, then the offsets in the
18706   // operands will all be based off of VecIn1, even those in VecIn2.
18707   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
18708 
18709   uint64_t VTSize = VT.getFixedSizeInBits();
18710   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
18711   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
18712 
18713   // We can't generate a shuffle node with mismatched input and output types.
18714   // Try to make the types match the type of the output.
18715   if (InVT1 != VT || InVT2 != VT) {
18716     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
18717       // If the output vector length is a multiple of both input lengths,
18718       // we can concatenate them and pad the rest with undefs.
18719       unsigned NumConcats = VTSize / InVT1Size;
18720       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
18721       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
18722       ConcatOps[0] = VecIn1;
18723       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
18724       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18725       VecIn2 = SDValue();
18726     } else if (InVT1Size == VTSize * 2) {
18727       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
18728         return SDValue();
18729 
18730       if (!VecIn2.getNode()) {
18731         // If we only have one input vector, and it's twice the size of the
18732         // output, split it in two.
18733         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
18734                              DAG.getVectorIdxConstant(NumElems, DL));
18735         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
18736         // Since we now have shorter input vectors, adjust the offset of the
18737         // second vector's start.
18738         Vec2Offset = NumElems;
18739       } else if (InVT2Size <= InVT1Size) {
18740         // VecIn1 is wider than the output, and we have another, possibly
18741         // smaller input. Pad the smaller input with undefs, shuffle at the
18742         // input vector width, and extract the output.
18743         // The shuffle type is different than VT, so check legality again.
18744         if (LegalOperations &&
18745             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
18746           return SDValue();
18747 
18748         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
18749         // lower it back into a BUILD_VECTOR. So if the inserted type is
18750         // illegal, don't even try.
18751         if (InVT1 != InVT2) {
18752           if (!TLI.isTypeLegal(InVT2))
18753             return SDValue();
18754           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
18755                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
18756         }
18757         ShuffleNumElems = NumElems * 2;
18758       } else {
18759         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
18760         // than VecIn1. We can't handle this for now - this case will disappear
18761         // when we start sorting the vectors by type.
18762         return SDValue();
18763       }
18764     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
18765       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
18766       ConcatOps[0] = VecIn2;
18767       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
18768     } else {
18769       // TODO: Support cases where the length mismatch isn't exactly by a
18770       // factor of 2.
18771       // TODO: Move this check upwards, so that if we have bad type
18772       // mismatches, we don't create any DAG nodes.
18773       return SDValue();
18774     }
18775   }
18776 
18777   // Initialize mask to undef.
18778   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
18779 
18780   // Only need to run up to the number of elements actually used, not the
18781   // total number of elements in the shuffle - if we are shuffling a wider
18782   // vector, the high lanes should be set to undef.
18783   for (unsigned i = 0; i != NumElems; ++i) {
18784     if (VectorMask[i] <= 0)
18785       continue;
18786 
18787     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
18788     if (VectorMask[i] == (int)LeftIdx) {
18789       Mask[i] = ExtIndex;
18790     } else if (VectorMask[i] == (int)LeftIdx + 1) {
18791       Mask[i] = Vec2Offset + ExtIndex;
18792     }
18793   }
18794 
18795   // The type the input vectors may have changed above.
18796   InVT1 = VecIn1.getValueType();
18797 
18798   // If we already have a VecIn2, it should have the same type as VecIn1.
18799   // If we don't, get an undef/zero vector of the appropriate type.
18800   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
18801   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
18802 
18803   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
18804   if (ShuffleNumElems > NumElems)
18805     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
18806 
18807   return Shuffle;
18808 }
18809 
18810 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
18811   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
18812 
18813   // First, determine where the build vector is not undef.
18814   // TODO: We could extend this to handle zero elements as well as undefs.
18815   int NumBVOps = BV->getNumOperands();
18816   int ZextElt = -1;
18817   for (int i = 0; i != NumBVOps; ++i) {
18818     SDValue Op = BV->getOperand(i);
18819     if (Op.isUndef())
18820       continue;
18821     if (ZextElt == -1)
18822       ZextElt = i;
18823     else
18824       return SDValue();
18825   }
18826   // Bail out if there's no non-undef element.
18827   if (ZextElt == -1)
18828     return SDValue();
18829 
18830   // The build vector contains some number of undef elements and exactly
18831   // one other element. That other element must be a zero-extended scalar
18832   // extracted from a vector at a constant index to turn this into a shuffle.
18833   // Also, require that the build vector does not implicitly truncate/extend
18834   // its elements.
18835   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
18836   EVT VT = BV->getValueType(0);
18837   SDValue Zext = BV->getOperand(ZextElt);
18838   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
18839       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18840       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
18841       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
18842     return SDValue();
18843 
18844   // The zero-extend must be a multiple of the source size, and we must be
18845   // building a vector of the same size as the source of the extract element.
18846   SDValue Extract = Zext.getOperand(0);
18847   unsigned DestSize = Zext.getValueSizeInBits();
18848   unsigned SrcSize = Extract.getValueSizeInBits();
18849   if (DestSize % SrcSize != 0 ||
18850       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
18851     return SDValue();
18852 
18853   // Create a shuffle mask that will combine the extracted element with zeros
18854   // and undefs.
18855   int ZextRatio = DestSize / SrcSize;
18856   int NumMaskElts = NumBVOps * ZextRatio;
18857   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
18858   for (int i = 0; i != NumMaskElts; ++i) {
18859     if (i / ZextRatio == ZextElt) {
18860       // The low bits of the (potentially translated) extracted element map to
18861       // the source vector. The high bits map to zero. We will use a zero vector
18862       // as the 2nd source operand of the shuffle, so use the 1st element of
18863       // that vector (mask value is number-of-elements) for the high bits.
18864       if (i % ZextRatio == 0)
18865         ShufMask[i] = Extract.getConstantOperandVal(1);
18866       else
18867         ShufMask[i] = NumMaskElts;
18868     }
18869 
18870     // Undef elements of the build vector remain undef because we initialize
18871     // the shuffle mask with -1.
18872   }
18873 
18874   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
18875   // bitcast (shuffle V, ZeroVec, VectorMask)
18876   SDLoc DL(BV);
18877   EVT VecVT = Extract.getOperand(0).getValueType();
18878   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
18879   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18880   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
18881                                              ZeroVec, ShufMask, DAG);
18882   if (!Shuf)
18883     return SDValue();
18884   return DAG.getBitcast(VT, Shuf);
18885 }
18886 
18887 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
18888 // operations. If the types of the vectors we're extracting from allow it,
18889 // turn this into a vector_shuffle node.
18890 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
18891   SDLoc DL(N);
18892   EVT VT = N->getValueType(0);
18893 
18894   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
18895   if (!isTypeLegal(VT))
18896     return SDValue();
18897 
18898   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
18899     return V;
18900 
18901   // May only combine to shuffle after legalize if shuffle is legal.
18902   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
18903     return SDValue();
18904 
18905   bool UsesZeroVector = false;
18906   unsigned NumElems = N->getNumOperands();
18907 
18908   // Record, for each element of the newly built vector, which input vector
18909   // that element comes from. -1 stands for undef, 0 for the zero vector,
18910   // and positive values for the input vectors.
18911   // VectorMask maps each element to its vector number, and VecIn maps vector
18912   // numbers to their initial SDValues.
18913 
18914   SmallVector<int, 8> VectorMask(NumElems, -1);
18915   SmallVector<SDValue, 8> VecIn;
18916   VecIn.push_back(SDValue());
18917 
18918   for (unsigned i = 0; i != NumElems; ++i) {
18919     SDValue Op = N->getOperand(i);
18920 
18921     if (Op.isUndef())
18922       continue;
18923 
18924     // See if we can use a blend with a zero vector.
18925     // TODO: Should we generalize this to a blend with an arbitrary constant
18926     // vector?
18927     if (isNullConstant(Op) || isNullFPConstant(Op)) {
18928       UsesZeroVector = true;
18929       VectorMask[i] = 0;
18930       continue;
18931     }
18932 
18933     // Not an undef or zero. If the input is something other than an
18934     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
18935     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
18936         !isa<ConstantSDNode>(Op.getOperand(1)))
18937       return SDValue();
18938     SDValue ExtractedFromVec = Op.getOperand(0);
18939 
18940     if (ExtractedFromVec.getValueType().isScalableVector())
18941       return SDValue();
18942 
18943     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
18944     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
18945       return SDValue();
18946 
18947     // All inputs must have the same element type as the output.
18948     if (VT.getVectorElementType() !=
18949         ExtractedFromVec.getValueType().getVectorElementType())
18950       return SDValue();
18951 
18952     // Have we seen this input vector before?
18953     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
18954     // a map back from SDValues to numbers isn't worth it.
18955     unsigned Idx = std::distance(
18956         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
18957     if (Idx == VecIn.size())
18958       VecIn.push_back(ExtractedFromVec);
18959 
18960     VectorMask[i] = Idx;
18961   }
18962 
18963   // If we didn't find at least one input vector, bail out.
18964   if (VecIn.size() < 2)
18965     return SDValue();
18966 
18967   // If all the Operands of BUILD_VECTOR extract from same
18968   // vector, then split the vector efficiently based on the maximum
18969   // vector access index and adjust the VectorMask and
18970   // VecIn accordingly.
18971   bool DidSplitVec = false;
18972   if (VecIn.size() == 2) {
18973     unsigned MaxIndex = 0;
18974     unsigned NearestPow2 = 0;
18975     SDValue Vec = VecIn.back();
18976     EVT InVT = Vec.getValueType();
18977     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
18978 
18979     for (unsigned i = 0; i < NumElems; i++) {
18980       if (VectorMask[i] <= 0)
18981         continue;
18982       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
18983       IndexVec[i] = Index;
18984       MaxIndex = std::max(MaxIndex, Index);
18985     }
18986 
18987     NearestPow2 = PowerOf2Ceil(MaxIndex);
18988     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
18989         NumElems * 2 < NearestPow2) {
18990       unsigned SplitSize = NearestPow2 / 2;
18991       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
18992                                      InVT.getVectorElementType(), SplitSize);
18993       if (TLI.isTypeLegal(SplitVT)) {
18994         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18995                                      DAG.getVectorIdxConstant(SplitSize, DL));
18996         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
18997                                      DAG.getVectorIdxConstant(0, DL));
18998         VecIn.pop_back();
18999         VecIn.push_back(VecIn1);
19000         VecIn.push_back(VecIn2);
19001         DidSplitVec = true;
19002 
19003         for (unsigned i = 0; i < NumElems; i++) {
19004           if (VectorMask[i] <= 0)
19005             continue;
19006           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
19007         }
19008       }
19009     }
19010   }
19011 
19012   // TODO: We want to sort the vectors by descending length, so that adjacent
19013   // pairs have similar length, and the longer vector is always first in the
19014   // pair.
19015 
19016   // TODO: Should this fire if some of the input vectors has illegal type (like
19017   // it does now), or should we let legalization run its course first?
19018 
19019   // Shuffle phase:
19020   // Take pairs of vectors, and shuffle them so that the result has elements
19021   // from these vectors in the correct places.
19022   // For example, given:
19023   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
19024   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
19025   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
19026   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
19027   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
19028   // We will generate:
19029   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
19030   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
19031   SmallVector<SDValue, 4> Shuffles;
19032   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
19033     unsigned LeftIdx = 2 * In + 1;
19034     SDValue VecLeft = VecIn[LeftIdx];
19035     SDValue VecRight =
19036         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
19037 
19038     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
19039                                                 VecRight, LeftIdx, DidSplitVec))
19040       Shuffles.push_back(Shuffle);
19041     else
19042       return SDValue();
19043   }
19044 
19045   // If we need the zero vector as an "ingredient" in the blend tree, add it
19046   // to the list of shuffles.
19047   if (UsesZeroVector)
19048     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
19049                                       : DAG.getConstantFP(0.0, DL, VT));
19050 
19051   // If we only have one shuffle, we're done.
19052   if (Shuffles.size() == 1)
19053     return Shuffles[0];
19054 
19055   // Update the vector mask to point to the post-shuffle vectors.
19056   for (int &Vec : VectorMask)
19057     if (Vec == 0)
19058       Vec = Shuffles.size() - 1;
19059     else
19060       Vec = (Vec - 1) / 2;
19061 
19062   // More than one shuffle. Generate a binary tree of blends, e.g. if from
19063   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
19064   // generate:
19065   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
19066   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
19067   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
19068   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
19069   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
19070   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
19071   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
19072 
19073   // Make sure the initial size of the shuffle list is even.
19074   if (Shuffles.size() % 2)
19075     Shuffles.push_back(DAG.getUNDEF(VT));
19076 
19077   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
19078     if (CurSize % 2) {
19079       Shuffles[CurSize] = DAG.getUNDEF(VT);
19080       CurSize++;
19081     }
19082     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
19083       int Left = 2 * In;
19084       int Right = 2 * In + 1;
19085       SmallVector<int, 8> Mask(NumElems, -1);
19086       for (unsigned i = 0; i != NumElems; ++i) {
19087         if (VectorMask[i] == Left) {
19088           Mask[i] = i;
19089           VectorMask[i] = In;
19090         } else if (VectorMask[i] == Right) {
19091           Mask[i] = i + NumElems;
19092           VectorMask[i] = In;
19093         }
19094       }
19095 
19096       Shuffles[In] =
19097           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
19098     }
19099   }
19100   return Shuffles[0];
19101 }
19102 
19103 // Try to turn a build vector of zero extends of extract vector elts into a
19104 // a vector zero extend and possibly an extract subvector.
19105 // TODO: Support sign extend?
19106 // TODO: Allow undef elements?
19107 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
19108   if (LegalOperations)
19109     return SDValue();
19110 
19111   EVT VT = N->getValueType(0);
19112 
19113   bool FoundZeroExtend = false;
19114   SDValue Op0 = N->getOperand(0);
19115   auto checkElem = [&](SDValue Op) -> int64_t {
19116     unsigned Opc = Op.getOpcode();
19117     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
19118     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
19119         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
19120         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
19121       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
19122         return C->getZExtValue();
19123     return -1;
19124   };
19125 
19126   // Make sure the first element matches
19127   // (zext (extract_vector_elt X, C))
19128   int64_t Offset = checkElem(Op0);
19129   if (Offset < 0)
19130     return SDValue();
19131 
19132   unsigned NumElems = N->getNumOperands();
19133   SDValue In = Op0.getOperand(0).getOperand(0);
19134   EVT InSVT = In.getValueType().getScalarType();
19135   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
19136 
19137   // Don't create an illegal input type after type legalization.
19138   if (LegalTypes && !TLI.isTypeLegal(InVT))
19139     return SDValue();
19140 
19141   // Ensure all the elements come from the same vector and are adjacent.
19142   for (unsigned i = 1; i != NumElems; ++i) {
19143     if ((Offset + i) != checkElem(N->getOperand(i)))
19144       return SDValue();
19145   }
19146 
19147   SDLoc DL(N);
19148   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
19149                    Op0.getOperand(0).getOperand(1));
19150   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
19151                      VT, In);
19152 }
19153 
19154 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
19155   EVT VT = N->getValueType(0);
19156 
19157   // A vector built entirely of undefs is undef.
19158   if (ISD::allOperandsUndef(N))
19159     return DAG.getUNDEF(VT);
19160 
19161   // If this is a splat of a bitcast from another vector, change to a
19162   // concat_vector.
19163   // For example:
19164   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
19165   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
19166   //
19167   // If X is a build_vector itself, the concat can become a larger build_vector.
19168   // TODO: Maybe this is useful for non-splat too?
19169   if (!LegalOperations) {
19170     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19171       Splat = peekThroughBitcasts(Splat);
19172       EVT SrcVT = Splat.getValueType();
19173       if (SrcVT.isVector()) {
19174         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
19175         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
19176                                      SrcVT.getVectorElementType(), NumElts);
19177         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
19178           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
19179           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
19180                                        NewVT, Ops);
19181           return DAG.getBitcast(VT, Concat);
19182         }
19183       }
19184     }
19185   }
19186 
19187   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
19188   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
19189     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
19190       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
19191       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
19192     }
19193 
19194   // Check if we can express BUILD VECTOR via subvector extract.
19195   if (!LegalTypes && (N->getNumOperands() > 1)) {
19196     SDValue Op0 = N->getOperand(0);
19197     auto checkElem = [&](SDValue Op) -> uint64_t {
19198       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
19199           (Op0.getOperand(0) == Op.getOperand(0)))
19200         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
19201           return CNode->getZExtValue();
19202       return -1;
19203     };
19204 
19205     int Offset = checkElem(Op0);
19206     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
19207       if (Offset + i != checkElem(N->getOperand(i))) {
19208         Offset = -1;
19209         break;
19210       }
19211     }
19212 
19213     if ((Offset == 0) &&
19214         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
19215       return Op0.getOperand(0);
19216     if ((Offset != -1) &&
19217         ((Offset % N->getValueType(0).getVectorNumElements()) ==
19218          0)) // IDX must be multiple of output size.
19219       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
19220                          Op0.getOperand(0), Op0.getOperand(1));
19221   }
19222 
19223   if (SDValue V = convertBuildVecZextToZext(N))
19224     return V;
19225 
19226   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
19227     return V;
19228 
19229   if (SDValue V = reduceBuildVecTruncToBitCast(N))
19230     return V;
19231 
19232   if (SDValue V = reduceBuildVecToShuffle(N))
19233     return V;
19234 
19235   return SDValue();
19236 }
19237 
19238 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
19239   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19240   EVT OpVT = N->getOperand(0).getValueType();
19241 
19242   // If the operands are legal vectors, leave them alone.
19243   if (TLI.isTypeLegal(OpVT))
19244     return SDValue();
19245 
19246   SDLoc DL(N);
19247   EVT VT = N->getValueType(0);
19248   SmallVector<SDValue, 8> Ops;
19249 
19250   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
19251   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19252 
19253   // Keep track of what we encounter.
19254   bool AnyInteger = false;
19255   bool AnyFP = false;
19256   for (const SDValue &Op : N->ops()) {
19257     if (ISD::BITCAST == Op.getOpcode() &&
19258         !Op.getOperand(0).getValueType().isVector())
19259       Ops.push_back(Op.getOperand(0));
19260     else if (ISD::UNDEF == Op.getOpcode())
19261       Ops.push_back(ScalarUndef);
19262     else
19263       return SDValue();
19264 
19265     // Note whether we encounter an integer or floating point scalar.
19266     // If it's neither, bail out, it could be something weird like x86mmx.
19267     EVT LastOpVT = Ops.back().getValueType();
19268     if (LastOpVT.isFloatingPoint())
19269       AnyFP = true;
19270     else if (LastOpVT.isInteger())
19271       AnyInteger = true;
19272     else
19273       return SDValue();
19274   }
19275 
19276   // If any of the operands is a floating point scalar bitcast to a vector,
19277   // use floating point types throughout, and bitcast everything.
19278   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
19279   if (AnyFP) {
19280     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
19281     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
19282     if (AnyInteger) {
19283       for (SDValue &Op : Ops) {
19284         if (Op.getValueType() == SVT)
19285           continue;
19286         if (Op.isUndef())
19287           Op = ScalarUndef;
19288         else
19289           Op = DAG.getBitcast(SVT, Op);
19290       }
19291     }
19292   }
19293 
19294   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
19295                                VT.getSizeInBits() / SVT.getSizeInBits());
19296   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
19297 }
19298 
19299 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
19300 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
19301 // most two distinct vectors the same size as the result, attempt to turn this
19302 // into a legal shuffle.
19303 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
19304   EVT VT = N->getValueType(0);
19305   EVT OpVT = N->getOperand(0).getValueType();
19306 
19307   // We currently can't generate an appropriate shuffle for a scalable vector.
19308   if (VT.isScalableVector())
19309     return SDValue();
19310 
19311   int NumElts = VT.getVectorNumElements();
19312   int NumOpElts = OpVT.getVectorNumElements();
19313 
19314   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
19315   SmallVector<int, 8> Mask;
19316 
19317   for (SDValue Op : N->ops()) {
19318     Op = peekThroughBitcasts(Op);
19319 
19320     // UNDEF nodes convert to UNDEF shuffle mask values.
19321     if (Op.isUndef()) {
19322       Mask.append((unsigned)NumOpElts, -1);
19323       continue;
19324     }
19325 
19326     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19327       return SDValue();
19328 
19329     // What vector are we extracting the subvector from and at what index?
19330     SDValue ExtVec = Op.getOperand(0);
19331     int ExtIdx = Op.getConstantOperandVal(1);
19332 
19333     // We want the EVT of the original extraction to correctly scale the
19334     // extraction index.
19335     EVT ExtVT = ExtVec.getValueType();
19336     ExtVec = peekThroughBitcasts(ExtVec);
19337 
19338     // UNDEF nodes convert to UNDEF shuffle mask values.
19339     if (ExtVec.isUndef()) {
19340       Mask.append((unsigned)NumOpElts, -1);
19341       continue;
19342     }
19343 
19344     // Ensure that we are extracting a subvector from a vector the same
19345     // size as the result.
19346     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
19347       return SDValue();
19348 
19349     // Scale the subvector index to account for any bitcast.
19350     int NumExtElts = ExtVT.getVectorNumElements();
19351     if (0 == (NumExtElts % NumElts))
19352       ExtIdx /= (NumExtElts / NumElts);
19353     else if (0 == (NumElts % NumExtElts))
19354       ExtIdx *= (NumElts / NumExtElts);
19355     else
19356       return SDValue();
19357 
19358     // At most we can reference 2 inputs in the final shuffle.
19359     if (SV0.isUndef() || SV0 == ExtVec) {
19360       SV0 = ExtVec;
19361       for (int i = 0; i != NumOpElts; ++i)
19362         Mask.push_back(i + ExtIdx);
19363     } else if (SV1.isUndef() || SV1 == ExtVec) {
19364       SV1 = ExtVec;
19365       for (int i = 0; i != NumOpElts; ++i)
19366         Mask.push_back(i + ExtIdx + NumElts);
19367     } else {
19368       return SDValue();
19369     }
19370   }
19371 
19372   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19373   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
19374                                      DAG.getBitcast(VT, SV1), Mask, DAG);
19375 }
19376 
19377 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
19378   unsigned CastOpcode = N->getOperand(0).getOpcode();
19379   switch (CastOpcode) {
19380   case ISD::SINT_TO_FP:
19381   case ISD::UINT_TO_FP:
19382   case ISD::FP_TO_SINT:
19383   case ISD::FP_TO_UINT:
19384     // TODO: Allow more opcodes?
19385     //  case ISD::BITCAST:
19386     //  case ISD::TRUNCATE:
19387     //  case ISD::ZERO_EXTEND:
19388     //  case ISD::SIGN_EXTEND:
19389     //  case ISD::FP_EXTEND:
19390     break;
19391   default:
19392     return SDValue();
19393   }
19394 
19395   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
19396   if (!SrcVT.isVector())
19397     return SDValue();
19398 
19399   // All operands of the concat must be the same kind of cast from the same
19400   // source type.
19401   SmallVector<SDValue, 4> SrcOps;
19402   for (SDValue Op : N->ops()) {
19403     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
19404         Op.getOperand(0).getValueType() != SrcVT)
19405       return SDValue();
19406     SrcOps.push_back(Op.getOperand(0));
19407   }
19408 
19409   // The wider cast must be supported by the target. This is unusual because
19410   // the operation support type parameter depends on the opcode. In addition,
19411   // check the other type in the cast to make sure this is really legal.
19412   EVT VT = N->getValueType(0);
19413   EVT SrcEltVT = SrcVT.getVectorElementType();
19414   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
19415   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
19416   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19417   switch (CastOpcode) {
19418   case ISD::SINT_TO_FP:
19419   case ISD::UINT_TO_FP:
19420     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
19421         !TLI.isTypeLegal(VT))
19422       return SDValue();
19423     break;
19424   case ISD::FP_TO_SINT:
19425   case ISD::FP_TO_UINT:
19426     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
19427         !TLI.isTypeLegal(ConcatSrcVT))
19428       return SDValue();
19429     break;
19430   default:
19431     llvm_unreachable("Unexpected cast opcode");
19432   }
19433 
19434   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
19435   SDLoc DL(N);
19436   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
19437   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
19438 }
19439 
19440 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
19441   // If we only have one input vector, we don't need to do any concatenation.
19442   if (N->getNumOperands() == 1)
19443     return N->getOperand(0);
19444 
19445   // Check if all of the operands are undefs.
19446   EVT VT = N->getValueType(0);
19447   if (ISD::allOperandsUndef(N))
19448     return DAG.getUNDEF(VT);
19449 
19450   // Optimize concat_vectors where all but the first of the vectors are undef.
19451   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
19452         return Op.isUndef();
19453       })) {
19454     SDValue In = N->getOperand(0);
19455     assert(In.getValueType().isVector() && "Must concat vectors");
19456 
19457     // If the input is a concat_vectors, just make a larger concat by padding
19458     // with smaller undefs.
19459     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
19460       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
19461       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
19462       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
19463       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19464     }
19465 
19466     SDValue Scalar = peekThroughOneUseBitcasts(In);
19467 
19468     // concat_vectors(scalar_to_vector(scalar), undef) ->
19469     //     scalar_to_vector(scalar)
19470     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
19471          Scalar.hasOneUse()) {
19472       EVT SVT = Scalar.getValueType().getVectorElementType();
19473       if (SVT == Scalar.getOperand(0).getValueType())
19474         Scalar = Scalar.getOperand(0);
19475     }
19476 
19477     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
19478     if (!Scalar.getValueType().isVector()) {
19479       // If the bitcast type isn't legal, it might be a trunc of a legal type;
19480       // look through the trunc so we can still do the transform:
19481       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
19482       if (Scalar->getOpcode() == ISD::TRUNCATE &&
19483           !TLI.isTypeLegal(Scalar.getValueType()) &&
19484           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
19485         Scalar = Scalar->getOperand(0);
19486 
19487       EVT SclTy = Scalar.getValueType();
19488 
19489       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
19490         return SDValue();
19491 
19492       // Bail out if the vector size is not a multiple of the scalar size.
19493       if (VT.getSizeInBits() % SclTy.getSizeInBits())
19494         return SDValue();
19495 
19496       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
19497       if (VNTNumElms < 2)
19498         return SDValue();
19499 
19500       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
19501       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
19502         return SDValue();
19503 
19504       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
19505       return DAG.getBitcast(VT, Res);
19506     }
19507   }
19508 
19509   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
19510   // We have already tested above for an UNDEF only concatenation.
19511   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
19512   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
19513   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
19514     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
19515   };
19516   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
19517     SmallVector<SDValue, 8> Opnds;
19518     EVT SVT = VT.getScalarType();
19519 
19520     EVT MinVT = SVT;
19521     if (!SVT.isFloatingPoint()) {
19522       // If BUILD_VECTOR are from built from integer, they may have different
19523       // operand types. Get the smallest type and truncate all operands to it.
19524       bool FoundMinVT = false;
19525       for (const SDValue &Op : N->ops())
19526         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19527           EVT OpSVT = Op.getOperand(0).getValueType();
19528           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
19529           FoundMinVT = true;
19530         }
19531       assert(FoundMinVT && "Concat vector type mismatch");
19532     }
19533 
19534     for (const SDValue &Op : N->ops()) {
19535       EVT OpVT = Op.getValueType();
19536       unsigned NumElts = OpVT.getVectorNumElements();
19537 
19538       if (ISD::UNDEF == Op.getOpcode())
19539         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
19540 
19541       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
19542         if (SVT.isFloatingPoint()) {
19543           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
19544           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
19545         } else {
19546           for (unsigned i = 0; i != NumElts; ++i)
19547             Opnds.push_back(
19548                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
19549         }
19550       }
19551     }
19552 
19553     assert(VT.getVectorNumElements() == Opnds.size() &&
19554            "Concat vector type mismatch");
19555     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
19556   }
19557 
19558   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
19559   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
19560     return V;
19561 
19562   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
19563   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
19564     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
19565       return V;
19566 
19567   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
19568     return V;
19569 
19570   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
19571   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
19572   // operands and look for a CONCAT operations that place the incoming vectors
19573   // at the exact same location.
19574   //
19575   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
19576   SDValue SingleSource = SDValue();
19577   unsigned PartNumElem =
19578       N->getOperand(0).getValueType().getVectorMinNumElements();
19579 
19580   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
19581     SDValue Op = N->getOperand(i);
19582 
19583     if (Op.isUndef())
19584       continue;
19585 
19586     // Check if this is the identity extract:
19587     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
19588       return SDValue();
19589 
19590     // Find the single incoming vector for the extract_subvector.
19591     if (SingleSource.getNode()) {
19592       if (Op.getOperand(0) != SingleSource)
19593         return SDValue();
19594     } else {
19595       SingleSource = Op.getOperand(0);
19596 
19597       // Check the source type is the same as the type of the result.
19598       // If not, this concat may extend the vector, so we can not
19599       // optimize it away.
19600       if (SingleSource.getValueType() != N->getValueType(0))
19601         return SDValue();
19602     }
19603 
19604     // Check that we are reading from the identity index.
19605     unsigned IdentityIndex = i * PartNumElem;
19606     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
19607       return SDValue();
19608   }
19609 
19610   if (SingleSource.getNode())
19611     return SingleSource;
19612 
19613   return SDValue();
19614 }
19615 
19616 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
19617 // if the subvector can be sourced for free.
19618 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
19619   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
19620       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
19621     return V.getOperand(1);
19622   }
19623   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
19624   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
19625       V.getOperand(0).getValueType() == SubVT &&
19626       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
19627     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
19628     return V.getOperand(SubIdx);
19629   }
19630   return SDValue();
19631 }
19632 
19633 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
19634                                               SelectionDAG &DAG,
19635                                               bool LegalOperations) {
19636   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19637   SDValue BinOp = Extract->getOperand(0);
19638   unsigned BinOpcode = BinOp.getOpcode();
19639   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
19640     return SDValue();
19641 
19642   EVT VecVT = BinOp.getValueType();
19643   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
19644   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
19645     return SDValue();
19646 
19647   SDValue Index = Extract->getOperand(1);
19648   EVT SubVT = Extract->getValueType(0);
19649   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
19650     return SDValue();
19651 
19652   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
19653   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
19654 
19655   // TODO: We could handle the case where only 1 operand is being inserted by
19656   //       creating an extract of the other operand, but that requires checking
19657   //       number of uses and/or costs.
19658   if (!Sub0 || !Sub1)
19659     return SDValue();
19660 
19661   // We are inserting both operands of the wide binop only to extract back
19662   // to the narrow vector size. Eliminate all of the insert/extract:
19663   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
19664   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
19665                      BinOp->getFlags());
19666 }
19667 
19668 /// If we are extracting a subvector produced by a wide binary operator try
19669 /// to use a narrow binary operator and/or avoid concatenation and extraction.
19670 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
19671                                           bool LegalOperations) {
19672   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
19673   // some of these bailouts with other transforms.
19674 
19675   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
19676     return V;
19677 
19678   // The extract index must be a constant, so we can map it to a concat operand.
19679   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19680   if (!ExtractIndexC)
19681     return SDValue();
19682 
19683   // We are looking for an optionally bitcasted wide vector binary operator
19684   // feeding an extract subvector.
19685   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19686   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
19687   unsigned BOpcode = BinOp.getOpcode();
19688   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
19689     return SDValue();
19690 
19691   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
19692   // reduced to the unary fneg when it is visited, and we probably want to deal
19693   // with fneg in a target-specific way.
19694   if (BOpcode == ISD::FSUB) {
19695     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
19696     if (C && C->getValueAPF().isNegZero())
19697       return SDValue();
19698   }
19699 
19700   // The binop must be a vector type, so we can extract some fraction of it.
19701   EVT WideBVT = BinOp.getValueType();
19702   // The optimisations below currently assume we are dealing with fixed length
19703   // vectors. It is possible to add support for scalable vectors, but at the
19704   // moment we've done no analysis to prove whether they are profitable or not.
19705   if (!WideBVT.isFixedLengthVector())
19706     return SDValue();
19707 
19708   EVT VT = Extract->getValueType(0);
19709   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
19710   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
19711          "Extract index is not a multiple of the vector length.");
19712 
19713   // Bail out if this is not a proper multiple width extraction.
19714   unsigned WideWidth = WideBVT.getSizeInBits();
19715   unsigned NarrowWidth = VT.getSizeInBits();
19716   if (WideWidth % NarrowWidth != 0)
19717     return SDValue();
19718 
19719   // Bail out if we are extracting a fraction of a single operation. This can
19720   // occur because we potentially looked through a bitcast of the binop.
19721   unsigned NarrowingRatio = WideWidth / NarrowWidth;
19722   unsigned WideNumElts = WideBVT.getVectorNumElements();
19723   if (WideNumElts % NarrowingRatio != 0)
19724     return SDValue();
19725 
19726   // Bail out if the target does not support a narrower version of the binop.
19727   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
19728                                    WideNumElts / NarrowingRatio);
19729   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
19730     return SDValue();
19731 
19732   // If extraction is cheap, we don't need to look at the binop operands
19733   // for concat ops. The narrow binop alone makes this transform profitable.
19734   // We can't just reuse the original extract index operand because we may have
19735   // bitcasted.
19736   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
19737   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
19738   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
19739       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
19740     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
19741     SDLoc DL(Extract);
19742     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19743     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19744                             BinOp.getOperand(0), NewExtIndex);
19745     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19746                             BinOp.getOperand(1), NewExtIndex);
19747     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
19748                                       BinOp.getNode()->getFlags());
19749     return DAG.getBitcast(VT, NarrowBinOp);
19750   }
19751 
19752   // Only handle the case where we are doubling and then halving. A larger ratio
19753   // may require more than two narrow binops to replace the wide binop.
19754   if (NarrowingRatio != 2)
19755     return SDValue();
19756 
19757   // TODO: The motivating case for this transform is an x86 AVX1 target. That
19758   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
19759   // flavors, but no other 256-bit integer support. This could be extended to
19760   // handle any binop, but that may require fixing/adding other folds to avoid
19761   // codegen regressions.
19762   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
19763     return SDValue();
19764 
19765   // We need at least one concatenation operation of a binop operand to make
19766   // this transform worthwhile. The concat must double the input vector sizes.
19767   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
19768     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
19769       return V.getOperand(ConcatOpNum);
19770     return SDValue();
19771   };
19772   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
19773   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
19774 
19775   if (SubVecL || SubVecR) {
19776     // If a binop operand was not the result of a concat, we must extract a
19777     // half-sized operand for our new narrow binop:
19778     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
19779     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
19780     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
19781     SDLoc DL(Extract);
19782     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
19783     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
19784                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19785                                       BinOp.getOperand(0), IndexC);
19786 
19787     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
19788                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
19789                                       BinOp.getOperand(1), IndexC);
19790 
19791     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
19792     return DAG.getBitcast(VT, NarrowBinOp);
19793   }
19794 
19795   return SDValue();
19796 }
19797 
19798 /// If we are extracting a subvector from a wide vector load, convert to a
19799 /// narrow load to eliminate the extraction:
19800 /// (extract_subvector (load wide vector)) --> (load narrow vector)
19801 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
19802   // TODO: Add support for big-endian. The offset calculation must be adjusted.
19803   if (DAG.getDataLayout().isBigEndian())
19804     return SDValue();
19805 
19806   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
19807   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
19808   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
19809       !ExtIdx)
19810     return SDValue();
19811 
19812   // Allow targets to opt-out.
19813   EVT VT = Extract->getValueType(0);
19814 
19815   // We can only create byte sized loads.
19816   if (!VT.isByteSized())
19817     return SDValue();
19818 
19819   unsigned Index = ExtIdx->getZExtValue();
19820   unsigned NumElts = VT.getVectorMinNumElements();
19821 
19822   // The definition of EXTRACT_SUBVECTOR states that the index must be a
19823   // multiple of the minimum number of elements in the result type.
19824   assert(Index % NumElts == 0 && "The extract subvector index is not a "
19825                                  "multiple of the result's element count");
19826 
19827   // It's fine to use TypeSize here as we know the offset will not be negative.
19828   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
19829 
19830   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19831   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
19832     return SDValue();
19833 
19834   // The narrow load will be offset from the base address of the old load if
19835   // we are extracting from something besides index 0 (little-endian).
19836   SDLoc DL(Extract);
19837 
19838   // TODO: Use "BaseIndexOffset" to make this more effective.
19839   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
19840 
19841   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
19842   MachineFunction &MF = DAG.getMachineFunction();
19843   MachineMemOperand *MMO;
19844   if (Offset.isScalable()) {
19845     MachinePointerInfo MPI =
19846         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
19847     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
19848   } else
19849     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(),
19850                                   StoreSize);
19851 
19852   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
19853   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
19854   return NewLd;
19855 }
19856 
19857 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
19858   EVT NVT = N->getValueType(0);
19859   SDValue V = N->getOperand(0);
19860   uint64_t ExtIdx = N->getConstantOperandVal(1);
19861 
19862   // Extract from UNDEF is UNDEF.
19863   if (V.isUndef())
19864     return DAG.getUNDEF(NVT);
19865 
19866   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
19867     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
19868       return NarrowLoad;
19869 
19870   // Combine an extract of an extract into a single extract_subvector.
19871   // ext (ext X, C), 0 --> ext X, C
19872   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
19873     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
19874                                     V.getConstantOperandVal(1)) &&
19875         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
19876       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
19877                          V.getOperand(1));
19878     }
19879   }
19880 
19881   // Try to move vector bitcast after extract_subv by scaling extraction index:
19882   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
19883   if (V.getOpcode() == ISD::BITCAST &&
19884       V.getOperand(0).getValueType().isVector()) {
19885     SDValue SrcOp = V.getOperand(0);
19886     EVT SrcVT = SrcOp.getValueType();
19887     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
19888     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
19889     if ((SrcNumElts % DestNumElts) == 0) {
19890       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
19891       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
19892       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
19893                                       NewExtEC);
19894       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19895         SDLoc DL(N);
19896         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
19897         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19898                                          V.getOperand(0), NewIndex);
19899         return DAG.getBitcast(NVT, NewExtract);
19900       }
19901     }
19902     if ((DestNumElts % SrcNumElts) == 0) {
19903       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
19904       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
19905         ElementCount NewExtEC =
19906             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
19907         EVT ScalarVT = SrcVT.getScalarType();
19908         if ((ExtIdx % DestSrcRatio) == 0) {
19909           SDLoc DL(N);
19910           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
19911           EVT NewExtVT =
19912               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
19913           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
19914             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19915             SDValue NewExtract =
19916                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
19917                             V.getOperand(0), NewIndex);
19918             return DAG.getBitcast(NVT, NewExtract);
19919           }
19920           if (NewExtEC.isScalar() &&
19921               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
19922             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
19923             SDValue NewExtract =
19924                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
19925                             V.getOperand(0), NewIndex);
19926             return DAG.getBitcast(NVT, NewExtract);
19927           }
19928         }
19929       }
19930     }
19931   }
19932 
19933   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
19934     unsigned ExtNumElts = NVT.getVectorMinNumElements();
19935     EVT ConcatSrcVT = V.getOperand(0).getValueType();
19936     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
19937            "Concat and extract subvector do not change element type");
19938     assert((ExtIdx % ExtNumElts) == 0 &&
19939            "Extract index is not a multiple of the input vector length.");
19940 
19941     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
19942     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
19943 
19944     // If the concatenated source types match this extract, it's a direct
19945     // simplification:
19946     // extract_subvec (concat V1, V2, ...), i --> Vi
19947     if (ConcatSrcNumElts == ExtNumElts)
19948       return V.getOperand(ConcatOpIdx);
19949 
19950     // If the concatenated source vectors are a multiple length of this extract,
19951     // then extract a fraction of one of those source vectors directly from a
19952     // concat operand. Example:
19953     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
19954     //   v2i8 extract_subvec v8i8 Y, 6
19955     if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
19956       SDLoc DL(N);
19957       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
19958       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
19959              "Trying to extract from >1 concat operand?");
19960       assert(NewExtIdx % ExtNumElts == 0 &&
19961              "Extract index is not a multiple of the input vector length.");
19962       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
19963       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
19964                          V.getOperand(ConcatOpIdx), NewIndexC);
19965     }
19966   }
19967 
19968   V = peekThroughBitcasts(V);
19969 
19970   // If the input is a build vector. Try to make a smaller build vector.
19971   if (V.getOpcode() == ISD::BUILD_VECTOR) {
19972     EVT InVT = V.getValueType();
19973     unsigned ExtractSize = NVT.getSizeInBits();
19974     unsigned EltSize = InVT.getScalarSizeInBits();
19975     // Only do this if we won't split any elements.
19976     if (ExtractSize % EltSize == 0) {
19977       unsigned NumElems = ExtractSize / EltSize;
19978       EVT EltVT = InVT.getVectorElementType();
19979       EVT ExtractVT =
19980           NumElems == 1 ? EltVT
19981                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
19982       if ((Level < AfterLegalizeDAG ||
19983            (NumElems == 1 ||
19984             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
19985           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
19986         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
19987 
19988         if (NumElems == 1) {
19989           SDValue Src = V->getOperand(IdxVal);
19990           if (EltVT != Src.getValueType())
19991             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
19992           return DAG.getBitcast(NVT, Src);
19993         }
19994 
19995         // Extract the pieces from the original build_vector.
19996         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
19997                                               V->ops().slice(IdxVal, NumElems));
19998         return DAG.getBitcast(NVT, BuildVec);
19999       }
20000     }
20001   }
20002 
20003   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
20004     // Handle only simple case where vector being inserted and vector
20005     // being extracted are of same size.
20006     EVT SmallVT = V.getOperand(1).getValueType();
20007     if (!NVT.bitsEq(SmallVT))
20008       return SDValue();
20009 
20010     // Combine:
20011     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
20012     // Into:
20013     //    indices are equal or bit offsets are equal => V1
20014     //    otherwise => (extract_subvec V1, ExtIdx)
20015     uint64_t InsIdx = V.getConstantOperandVal(2);
20016     if (InsIdx * SmallVT.getScalarSizeInBits() ==
20017         ExtIdx * NVT.getScalarSizeInBits())
20018       return DAG.getBitcast(NVT, V.getOperand(1));
20019     return DAG.getNode(
20020         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
20021         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
20022         N->getOperand(1));
20023   }
20024 
20025   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
20026     return NarrowBOp;
20027 
20028   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20029     return SDValue(N, 0);
20030 
20031   return SDValue();
20032 }
20033 
20034 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
20035 /// followed by concatenation. Narrow vector ops may have better performance
20036 /// than wide ops, and this can unlock further narrowing of other vector ops.
20037 /// Targets can invert this transform later if it is not profitable.
20038 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
20039                                          SelectionDAG &DAG) {
20040   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
20041   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
20042       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
20043       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
20044     return SDValue();
20045 
20046   // Split the wide shuffle mask into halves. Any mask element that is accessing
20047   // operand 1 is offset down to account for narrowing of the vectors.
20048   ArrayRef<int> Mask = Shuf->getMask();
20049   EVT VT = Shuf->getValueType(0);
20050   unsigned NumElts = VT.getVectorNumElements();
20051   unsigned HalfNumElts = NumElts / 2;
20052   SmallVector<int, 16> Mask0(HalfNumElts, -1);
20053   SmallVector<int, 16> Mask1(HalfNumElts, -1);
20054   for (unsigned i = 0; i != NumElts; ++i) {
20055     if (Mask[i] == -1)
20056       continue;
20057     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
20058     if (i < HalfNumElts)
20059       Mask0[i] = M;
20060     else
20061       Mask1[i - HalfNumElts] = M;
20062   }
20063 
20064   // Ask the target if this is a valid transform.
20065   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
20066   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
20067                                 HalfNumElts);
20068   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
20069       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
20070     return SDValue();
20071 
20072   // shuffle (concat X, undef), (concat Y, undef), Mask -->
20073   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
20074   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
20075   SDLoc DL(Shuf);
20076   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
20077   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
20078   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
20079 }
20080 
20081 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
20082 // or turn a shuffle of a single concat into simpler shuffle then concat.
20083 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
20084   EVT VT = N->getValueType(0);
20085   unsigned NumElts = VT.getVectorNumElements();
20086 
20087   SDValue N0 = N->getOperand(0);
20088   SDValue N1 = N->getOperand(1);
20089   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20090   ArrayRef<int> Mask = SVN->getMask();
20091 
20092   SmallVector<SDValue, 4> Ops;
20093   EVT ConcatVT = N0.getOperand(0).getValueType();
20094   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
20095   unsigned NumConcats = NumElts / NumElemsPerConcat;
20096 
20097   auto IsUndefMaskElt = [](int i) { return i == -1; };
20098 
20099   // Special case: shuffle(concat(A,B)) can be more efficiently represented
20100   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
20101   // half vector elements.
20102   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
20103       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
20104                    IsUndefMaskElt)) {
20105     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
20106                               N0.getOperand(1),
20107                               Mask.slice(0, NumElemsPerConcat));
20108     N1 = DAG.getUNDEF(ConcatVT);
20109     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
20110   }
20111 
20112   // Look at every vector that's inserted. We're looking for exact
20113   // subvector-sized copies from a concatenated vector
20114   for (unsigned I = 0; I != NumConcats; ++I) {
20115     unsigned Begin = I * NumElemsPerConcat;
20116     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
20117 
20118     // Make sure we're dealing with a copy.
20119     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
20120       Ops.push_back(DAG.getUNDEF(ConcatVT));
20121       continue;
20122     }
20123 
20124     int OpIdx = -1;
20125     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
20126       if (IsUndefMaskElt(SubMask[i]))
20127         continue;
20128       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
20129         return SDValue();
20130       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
20131       if (0 <= OpIdx && EltOpIdx != OpIdx)
20132         return SDValue();
20133       OpIdx = EltOpIdx;
20134     }
20135     assert(0 <= OpIdx && "Unknown concat_vectors op");
20136 
20137     if (OpIdx < (int)N0.getNumOperands())
20138       Ops.push_back(N0.getOperand(OpIdx));
20139     else
20140       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
20141   }
20142 
20143   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
20144 }
20145 
20146 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20147 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20148 //
20149 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
20150 // a simplification in some sense, but it isn't appropriate in general: some
20151 // BUILD_VECTORs are substantially cheaper than others. The general case
20152 // of a BUILD_VECTOR requires inserting each element individually (or
20153 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
20154 // all constants is a single constant pool load.  A BUILD_VECTOR where each
20155 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
20156 // are undef lowers to a small number of element insertions.
20157 //
20158 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
20159 // We don't fold shuffles where one side is a non-zero constant, and we don't
20160 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
20161 // non-constant operands. This seems to work out reasonably well in practice.
20162 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
20163                                        SelectionDAG &DAG,
20164                                        const TargetLowering &TLI) {
20165   EVT VT = SVN->getValueType(0);
20166   unsigned NumElts = VT.getVectorNumElements();
20167   SDValue N0 = SVN->getOperand(0);
20168   SDValue N1 = SVN->getOperand(1);
20169 
20170   if (!N0->hasOneUse())
20171     return SDValue();
20172 
20173   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
20174   // discussed above.
20175   if (!N1.isUndef()) {
20176     if (!N1->hasOneUse())
20177       return SDValue();
20178 
20179     bool N0AnyConst = isAnyConstantBuildVector(N0);
20180     bool N1AnyConst = isAnyConstantBuildVector(N1);
20181     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
20182       return SDValue();
20183     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
20184       return SDValue();
20185   }
20186 
20187   // If both inputs are splats of the same value then we can safely merge this
20188   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
20189   bool IsSplat = false;
20190   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
20191   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
20192   if (BV0 && BV1)
20193     if (SDValue Splat0 = BV0->getSplatValue())
20194       IsSplat = (Splat0 == BV1->getSplatValue());
20195 
20196   SmallVector<SDValue, 8> Ops;
20197   SmallSet<SDValue, 16> DuplicateOps;
20198   for (int M : SVN->getMask()) {
20199     SDValue Op = DAG.getUNDEF(VT.getScalarType());
20200     if (M >= 0) {
20201       int Idx = M < (int)NumElts ? M : M - NumElts;
20202       SDValue &S = (M < (int)NumElts ? N0 : N1);
20203       if (S.getOpcode() == ISD::BUILD_VECTOR) {
20204         Op = S.getOperand(Idx);
20205       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
20206         SDValue Op0 = S.getOperand(0);
20207         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
20208       } else {
20209         // Operand can't be combined - bail out.
20210         return SDValue();
20211       }
20212     }
20213 
20214     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
20215     // generating a splat; semantically, this is fine, but it's likely to
20216     // generate low-quality code if the target can't reconstruct an appropriate
20217     // shuffle.
20218     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
20219       if (!IsSplat && !DuplicateOps.insert(Op).second)
20220         return SDValue();
20221 
20222     Ops.push_back(Op);
20223   }
20224 
20225   // BUILD_VECTOR requires all inputs to be of the same type, find the
20226   // maximum type and extend them all.
20227   EVT SVT = VT.getScalarType();
20228   if (SVT.isInteger())
20229     for (SDValue &Op : Ops)
20230       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
20231   if (SVT != VT.getScalarType())
20232     for (SDValue &Op : Ops)
20233       Op = TLI.isZExtFree(Op.getValueType(), SVT)
20234                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
20235                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
20236   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
20237 }
20238 
20239 // Match shuffles that can be converted to any_vector_extend_in_reg.
20240 // This is often generated during legalization.
20241 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
20242 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
20243 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
20244                                             SelectionDAG &DAG,
20245                                             const TargetLowering &TLI,
20246                                             bool LegalOperations) {
20247   EVT VT = SVN->getValueType(0);
20248   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20249 
20250   // TODO Add support for big-endian when we have a test case.
20251   if (!VT.isInteger() || IsBigEndian)
20252     return SDValue();
20253 
20254   unsigned NumElts = VT.getVectorNumElements();
20255   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20256   ArrayRef<int> Mask = SVN->getMask();
20257   SDValue N0 = SVN->getOperand(0);
20258 
20259   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
20260   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
20261     for (unsigned i = 0; i != NumElts; ++i) {
20262       if (Mask[i] < 0)
20263         continue;
20264       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
20265         continue;
20266       return false;
20267     }
20268     return true;
20269   };
20270 
20271   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
20272   // power-of-2 extensions as they are the most likely.
20273   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
20274     // Check for non power of 2 vector sizes
20275     if (NumElts % Scale != 0)
20276       continue;
20277     if (!isAnyExtend(Scale))
20278       continue;
20279 
20280     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
20281     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
20282     // Never create an illegal type. Only create unsupported operations if we
20283     // are pre-legalization.
20284     if (TLI.isTypeLegal(OutVT))
20285       if (!LegalOperations ||
20286           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
20287         return DAG.getBitcast(VT,
20288                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
20289                                           SDLoc(SVN), OutVT, N0));
20290   }
20291 
20292   return SDValue();
20293 }
20294 
20295 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
20296 // each source element of a large type into the lowest elements of a smaller
20297 // destination type. This is often generated during legalization.
20298 // If the source node itself was a '*_extend_vector_inreg' node then we should
20299 // then be able to remove it.
20300 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
20301                                         SelectionDAG &DAG) {
20302   EVT VT = SVN->getValueType(0);
20303   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
20304 
20305   // TODO Add support for big-endian when we have a test case.
20306   if (!VT.isInteger() || IsBigEndian)
20307     return SDValue();
20308 
20309   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
20310 
20311   unsigned Opcode = N0.getOpcode();
20312   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
20313       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
20314       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
20315     return SDValue();
20316 
20317   SDValue N00 = N0.getOperand(0);
20318   ArrayRef<int> Mask = SVN->getMask();
20319   unsigned NumElts = VT.getVectorNumElements();
20320   unsigned EltSizeInBits = VT.getScalarSizeInBits();
20321   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
20322   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
20323 
20324   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
20325     return SDValue();
20326   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
20327 
20328   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
20329   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
20330   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
20331   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
20332     for (unsigned i = 0; i != NumElts; ++i) {
20333       if (Mask[i] < 0)
20334         continue;
20335       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
20336         continue;
20337       return false;
20338     }
20339     return true;
20340   };
20341 
20342   // At the moment we just handle the case where we've truncated back to the
20343   // same size as before the extension.
20344   // TODO: handle more extension/truncation cases as cases arise.
20345   if (EltSizeInBits != ExtSrcSizeInBits)
20346     return SDValue();
20347 
20348   // We can remove *extend_vector_inreg only if the truncation happens at
20349   // the same scale as the extension.
20350   if (isTruncate(ExtScale))
20351     return DAG.getBitcast(VT, N00);
20352 
20353   return SDValue();
20354 }
20355 
20356 // Combine shuffles of splat-shuffles of the form:
20357 // shuffle (shuffle V, undef, splat-mask), undef, M
20358 // If splat-mask contains undef elements, we need to be careful about
20359 // introducing undef's in the folded mask which are not the result of composing
20360 // the masks of the shuffles.
20361 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
20362                                         SelectionDAG &DAG) {
20363   if (!Shuf->getOperand(1).isUndef())
20364     return SDValue();
20365   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20366   if (!Splat || !Splat->isSplat())
20367     return SDValue();
20368 
20369   ArrayRef<int> ShufMask = Shuf->getMask();
20370   ArrayRef<int> SplatMask = Splat->getMask();
20371   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
20372 
20373   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
20374   // every undef mask element in the splat-shuffle has a corresponding undef
20375   // element in the user-shuffle's mask or if the composition of mask elements
20376   // would result in undef.
20377   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
20378   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
20379   //   In this case it is not legal to simplify to the splat-shuffle because we
20380   //   may be exposing the users of the shuffle an undef element at index 1
20381   //   which was not there before the combine.
20382   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
20383   //   In this case the composition of masks yields SplatMask, so it's ok to
20384   //   simplify to the splat-shuffle.
20385   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
20386   //   In this case the composed mask includes all undef elements of SplatMask
20387   //   and in addition sets element zero to undef. It is safe to simplify to
20388   //   the splat-shuffle.
20389   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
20390                                        ArrayRef<int> SplatMask) {
20391     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
20392       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
20393           SplatMask[UserMask[i]] != -1)
20394         return false;
20395     return true;
20396   };
20397   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
20398     return Shuf->getOperand(0);
20399 
20400   // Create a new shuffle with a mask that is composed of the two shuffles'
20401   // masks.
20402   SmallVector<int, 32> NewMask;
20403   for (int Idx : ShufMask)
20404     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
20405 
20406   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
20407                               Splat->getOperand(0), Splat->getOperand(1),
20408                               NewMask);
20409 }
20410 
20411 /// Combine shuffle of shuffle of the form:
20412 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
20413 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
20414                                      SelectionDAG &DAG) {
20415   if (!OuterShuf->getOperand(1).isUndef())
20416     return SDValue();
20417   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
20418   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
20419     return SDValue();
20420 
20421   ArrayRef<int> OuterMask = OuterShuf->getMask();
20422   ArrayRef<int> InnerMask = InnerShuf->getMask();
20423   unsigned NumElts = OuterMask.size();
20424   assert(NumElts == InnerMask.size() && "Mask length mismatch");
20425   SmallVector<int, 32> CombinedMask(NumElts, -1);
20426   int SplatIndex = -1;
20427   for (unsigned i = 0; i != NumElts; ++i) {
20428     // Undef lanes remain undef.
20429     int OuterMaskElt = OuterMask[i];
20430     if (OuterMaskElt == -1)
20431       continue;
20432 
20433     // Peek through the shuffle masks to get the underlying source element.
20434     int InnerMaskElt = InnerMask[OuterMaskElt];
20435     if (InnerMaskElt == -1)
20436       continue;
20437 
20438     // Initialize the splatted element.
20439     if (SplatIndex == -1)
20440       SplatIndex = InnerMaskElt;
20441 
20442     // Non-matching index - this is not a splat.
20443     if (SplatIndex != InnerMaskElt)
20444       return SDValue();
20445 
20446     CombinedMask[i] = InnerMaskElt;
20447   }
20448   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
20449           getSplatIndex(CombinedMask) != -1) &&
20450          "Expected a splat mask");
20451 
20452   // TODO: The transform may be a win even if the mask is not legal.
20453   EVT VT = OuterShuf->getValueType(0);
20454   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
20455   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
20456     return SDValue();
20457 
20458   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
20459                               InnerShuf->getOperand(1), CombinedMask);
20460 }
20461 
20462 /// If the shuffle mask is taking exactly one element from the first vector
20463 /// operand and passing through all other elements from the second vector
20464 /// operand, return the index of the mask element that is choosing an element
20465 /// from the first operand. Otherwise, return -1.
20466 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
20467   int MaskSize = Mask.size();
20468   int EltFromOp0 = -1;
20469   // TODO: This does not match if there are undef elements in the shuffle mask.
20470   // Should we ignore undefs in the shuffle mask instead? The trade-off is
20471   // removing an instruction (a shuffle), but losing the knowledge that some
20472   // vector lanes are not needed.
20473   for (int i = 0; i != MaskSize; ++i) {
20474     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
20475       // We're looking for a shuffle of exactly one element from operand 0.
20476       if (EltFromOp0 != -1)
20477         return -1;
20478       EltFromOp0 = i;
20479     } else if (Mask[i] != i + MaskSize) {
20480       // Nothing from operand 1 can change lanes.
20481       return -1;
20482     }
20483   }
20484   return EltFromOp0;
20485 }
20486 
20487 /// If a shuffle inserts exactly one element from a source vector operand into
20488 /// another vector operand and we can access the specified element as a scalar,
20489 /// then we can eliminate the shuffle.
20490 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
20491                                       SelectionDAG &DAG) {
20492   // First, check if we are taking one element of a vector and shuffling that
20493   // element into another vector.
20494   ArrayRef<int> Mask = Shuf->getMask();
20495   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
20496   SDValue Op0 = Shuf->getOperand(0);
20497   SDValue Op1 = Shuf->getOperand(1);
20498   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
20499   if (ShufOp0Index == -1) {
20500     // Commute mask and check again.
20501     ShuffleVectorSDNode::commuteMask(CommutedMask);
20502     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
20503     if (ShufOp0Index == -1)
20504       return SDValue();
20505     // Commute operands to match the commuted shuffle mask.
20506     std::swap(Op0, Op1);
20507     Mask = CommutedMask;
20508   }
20509 
20510   // The shuffle inserts exactly one element from operand 0 into operand 1.
20511   // Now see if we can access that element as a scalar via a real insert element
20512   // instruction.
20513   // TODO: We can try harder to locate the element as a scalar. Examples: it
20514   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
20515   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
20516          "Shuffle mask value must be from operand 0");
20517   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
20518     return SDValue();
20519 
20520   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
20521   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
20522     return SDValue();
20523 
20524   // There's an existing insertelement with constant insertion index, so we
20525   // don't need to check the legality/profitability of a replacement operation
20526   // that differs at most in the constant value. The target should be able to
20527   // lower any of those in a similar way. If not, legalization will expand this
20528   // to a scalar-to-vector plus shuffle.
20529   //
20530   // Note that the shuffle may move the scalar from the position that the insert
20531   // element used. Therefore, our new insert element occurs at the shuffle's
20532   // mask index value, not the insert's index value.
20533   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
20534   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
20535   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
20536                      Op1, Op0.getOperand(1), NewInsIndex);
20537 }
20538 
20539 /// If we have a unary shuffle of a shuffle, see if it can be folded away
20540 /// completely. This has the potential to lose undef knowledge because the first
20541 /// shuffle may not have an undef mask element where the second one does. So
20542 /// only call this after doing simplifications based on demanded elements.
20543 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
20544   // shuf (shuf0 X, Y, Mask0), undef, Mask
20545   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
20546   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
20547     return SDValue();
20548 
20549   ArrayRef<int> Mask = Shuf->getMask();
20550   ArrayRef<int> Mask0 = Shuf0->getMask();
20551   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
20552     // Ignore undef elements.
20553     if (Mask[i] == -1)
20554       continue;
20555     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
20556 
20557     // Is the element of the shuffle operand chosen by this shuffle the same as
20558     // the element chosen by the shuffle operand itself?
20559     if (Mask0[Mask[i]] != Mask0[i])
20560       return SDValue();
20561   }
20562   // Every element of this shuffle is identical to the result of the previous
20563   // shuffle, so we can replace this value.
20564   return Shuf->getOperand(0);
20565 }
20566 
20567 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
20568   EVT VT = N->getValueType(0);
20569   unsigned NumElts = VT.getVectorNumElements();
20570 
20571   SDValue N0 = N->getOperand(0);
20572   SDValue N1 = N->getOperand(1);
20573 
20574   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
20575 
20576   // Canonicalize shuffle undef, undef -> undef
20577   if (N0.isUndef() && N1.isUndef())
20578     return DAG.getUNDEF(VT);
20579 
20580   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
20581 
20582   // Canonicalize shuffle v, v -> v, undef
20583   if (N0 == N1) {
20584     SmallVector<int, 8> NewMask;
20585     for (unsigned i = 0; i != NumElts; ++i) {
20586       int Idx = SVN->getMaskElt(i);
20587       if (Idx >= (int)NumElts) Idx -= NumElts;
20588       NewMask.push_back(Idx);
20589     }
20590     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
20591   }
20592 
20593   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
20594   if (N0.isUndef())
20595     return DAG.getCommutedVectorShuffle(*SVN);
20596 
20597   // Remove references to rhs if it is undef
20598   if (N1.isUndef()) {
20599     bool Changed = false;
20600     SmallVector<int, 8> NewMask;
20601     for (unsigned i = 0; i != NumElts; ++i) {
20602       int Idx = SVN->getMaskElt(i);
20603       if (Idx >= (int)NumElts) {
20604         Idx = -1;
20605         Changed = true;
20606       }
20607       NewMask.push_back(Idx);
20608     }
20609     if (Changed)
20610       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
20611   }
20612 
20613   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
20614     return InsElt;
20615 
20616   // A shuffle of a single vector that is a splatted value can always be folded.
20617   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
20618     return V;
20619 
20620   if (SDValue V = formSplatFromShuffles(SVN, DAG))
20621     return V;
20622 
20623   // If it is a splat, check if the argument vector is another splat or a
20624   // build_vector.
20625   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
20626     int SplatIndex = SVN->getSplatIndex();
20627     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
20628         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
20629       // splat (vector_bo L, R), Index -->
20630       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
20631       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
20632       SDLoc DL(N);
20633       EVT EltVT = VT.getScalarType();
20634       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
20635       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
20636       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
20637       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
20638                                   N0.getNode()->getFlags());
20639       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
20640       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
20641       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
20642     }
20643 
20644     // If this is a bit convert that changes the element type of the vector but
20645     // not the number of vector elements, look through it.  Be careful not to
20646     // look though conversions that change things like v4f32 to v2f64.
20647     SDNode *V = N0.getNode();
20648     if (V->getOpcode() == ISD::BITCAST) {
20649       SDValue ConvInput = V->getOperand(0);
20650       if (ConvInput.getValueType().isVector() &&
20651           ConvInput.getValueType().getVectorNumElements() == NumElts)
20652         V = ConvInput.getNode();
20653     }
20654 
20655     if (V->getOpcode() == ISD::BUILD_VECTOR) {
20656       assert(V->getNumOperands() == NumElts &&
20657              "BUILD_VECTOR has wrong number of operands");
20658       SDValue Base;
20659       bool AllSame = true;
20660       for (unsigned i = 0; i != NumElts; ++i) {
20661         if (!V->getOperand(i).isUndef()) {
20662           Base = V->getOperand(i);
20663           break;
20664         }
20665       }
20666       // Splat of <u, u, u, u>, return <u, u, u, u>
20667       if (!Base.getNode())
20668         return N0;
20669       for (unsigned i = 0; i != NumElts; ++i) {
20670         if (V->getOperand(i) != Base) {
20671           AllSame = false;
20672           break;
20673         }
20674       }
20675       // Splat of <x, x, x, x>, return <x, x, x, x>
20676       if (AllSame)
20677         return N0;
20678 
20679       // Canonicalize any other splat as a build_vector.
20680       SDValue Splatted = V->getOperand(SplatIndex);
20681       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
20682       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
20683 
20684       // We may have jumped through bitcasts, so the type of the
20685       // BUILD_VECTOR may not match the type of the shuffle.
20686       if (V->getValueType(0) != VT)
20687         NewBV = DAG.getBitcast(VT, NewBV);
20688       return NewBV;
20689     }
20690   }
20691 
20692   // Simplify source operands based on shuffle mask.
20693   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
20694     return SDValue(N, 0);
20695 
20696   // This is intentionally placed after demanded elements simplification because
20697   // it could eliminate knowledge of undef elements created by this shuffle.
20698   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
20699     return ShufOp;
20700 
20701   // Match shuffles that can be converted to any_vector_extend_in_reg.
20702   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
20703     return V;
20704 
20705   // Combine "truncate_vector_in_reg" style shuffles.
20706   if (SDValue V = combineTruncationShuffle(SVN, DAG))
20707     return V;
20708 
20709   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
20710       Level < AfterLegalizeVectorOps &&
20711       (N1.isUndef() ||
20712       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
20713        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
20714     if (SDValue V = partitionShuffleOfConcats(N, DAG))
20715       return V;
20716   }
20717 
20718   // A shuffle of a concat of the same narrow vector can be reduced to use
20719   // only low-half elements of a concat with undef:
20720   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
20721   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
20722       N0.getNumOperands() == 2 &&
20723       N0.getOperand(0) == N0.getOperand(1)) {
20724     int HalfNumElts = (int)NumElts / 2;
20725     SmallVector<int, 8> NewMask;
20726     for (unsigned i = 0; i != NumElts; ++i) {
20727       int Idx = SVN->getMaskElt(i);
20728       if (Idx >= HalfNumElts) {
20729         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
20730         Idx -= HalfNumElts;
20731       }
20732       NewMask.push_back(Idx);
20733     }
20734     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
20735       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
20736       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
20737                                    N0.getOperand(0), UndefVec);
20738       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
20739     }
20740   }
20741 
20742   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
20743   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
20744   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
20745     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
20746       return Res;
20747 
20748   // If this shuffle only has a single input that is a bitcasted shuffle,
20749   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
20750   // back to their original types.
20751   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
20752       N1.isUndef() && Level < AfterLegalizeVectorOps &&
20753       TLI.isTypeLegal(VT)) {
20754 
20755     SDValue BC0 = peekThroughOneUseBitcasts(N0);
20756     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
20757       EVT SVT = VT.getScalarType();
20758       EVT InnerVT = BC0->getValueType(0);
20759       EVT InnerSVT = InnerVT.getScalarType();
20760 
20761       // Determine which shuffle works with the smaller scalar type.
20762       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
20763       EVT ScaleSVT = ScaleVT.getScalarType();
20764 
20765       if (TLI.isTypeLegal(ScaleVT) &&
20766           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
20767           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
20768         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20769         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
20770 
20771         // Scale the shuffle masks to the smaller scalar type.
20772         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
20773         SmallVector<int, 8> InnerMask;
20774         SmallVector<int, 8> OuterMask;
20775         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
20776         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
20777 
20778         // Merge the shuffle masks.
20779         SmallVector<int, 8> NewMask;
20780         for (int M : OuterMask)
20781           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
20782 
20783         // Test for shuffle mask legality over both commutations.
20784         SDValue SV0 = BC0->getOperand(0);
20785         SDValue SV1 = BC0->getOperand(1);
20786         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20787         if (!LegalMask) {
20788           std::swap(SV0, SV1);
20789           ShuffleVectorSDNode::commuteMask(NewMask);
20790           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
20791         }
20792 
20793         if (LegalMask) {
20794           SV0 = DAG.getBitcast(ScaleVT, SV0);
20795           SV1 = DAG.getBitcast(ScaleVT, SV1);
20796           return DAG.getBitcast(
20797               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
20798         }
20799       }
20800     }
20801   }
20802 
20803   // Canonicalize shuffles according to rules:
20804   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
20805   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
20806   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
20807   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
20808       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
20809       TLI.isTypeLegal(VT)) {
20810     // The incoming shuffle must be of the same type as the result of the
20811     // current shuffle.
20812     assert(N1->getOperand(0).getValueType() == VT &&
20813            "Shuffle types don't match");
20814 
20815     SDValue SV0 = N1->getOperand(0);
20816     SDValue SV1 = N1->getOperand(1);
20817     bool HasSameOp0 = N0 == SV0;
20818     bool IsSV1Undef = SV1.isUndef();
20819     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
20820       // Commute the operands of this shuffle so that next rule
20821       // will trigger.
20822       return DAG.getCommutedVectorShuffle(*SVN);
20823   }
20824 
20825   // Try to fold according to rules:
20826   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20827   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20828   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20829   // Don't try to fold shuffles with illegal type.
20830   // Only fold if this shuffle is the only user of the other shuffle.
20831   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
20832       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
20833     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
20834 
20835     // Don't try to fold splats; they're likely to simplify somehow, or they
20836     // might be free.
20837     if (OtherSV->isSplat())
20838       return SDValue();
20839 
20840     // The incoming shuffle must be of the same type as the result of the
20841     // current shuffle.
20842     assert(OtherSV->getOperand(0).getValueType() == VT &&
20843            "Shuffle types don't match");
20844 
20845     SDValue SV0, SV1;
20846     SmallVector<int, 4> Mask;
20847     // Compute the combined shuffle mask for a shuffle with SV0 as the first
20848     // operand, and SV1 as the second operand.
20849     for (unsigned i = 0; i != NumElts; ++i) {
20850       int Idx = SVN->getMaskElt(i);
20851       if (Idx < 0) {
20852         // Propagate Undef.
20853         Mask.push_back(Idx);
20854         continue;
20855       }
20856 
20857       SDValue CurrentVec;
20858       if (Idx < (int)NumElts) {
20859         // This shuffle index refers to the inner shuffle N0. Lookup the inner
20860         // shuffle mask to identify which vector is actually referenced.
20861         Idx = OtherSV->getMaskElt(Idx);
20862         if (Idx < 0) {
20863           // Propagate Undef.
20864           Mask.push_back(Idx);
20865           continue;
20866         }
20867 
20868         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
20869                                            : OtherSV->getOperand(1);
20870       } else {
20871         // This shuffle index references an element within N1.
20872         CurrentVec = N1;
20873       }
20874 
20875       // Simple case where 'CurrentVec' is UNDEF.
20876       if (CurrentVec.isUndef()) {
20877         Mask.push_back(-1);
20878         continue;
20879       }
20880 
20881       // Canonicalize the shuffle index. We don't know yet if CurrentVec
20882       // will be the first or second operand of the combined shuffle.
20883       Idx = Idx % NumElts;
20884       if (!SV0.getNode() || SV0 == CurrentVec) {
20885         // Ok. CurrentVec is the left hand side.
20886         // Update the mask accordingly.
20887         SV0 = CurrentVec;
20888         Mask.push_back(Idx);
20889         continue;
20890       }
20891 
20892       // Bail out if we cannot convert the shuffle pair into a single shuffle.
20893       if (SV1.getNode() && SV1 != CurrentVec)
20894         return SDValue();
20895 
20896       // Ok. CurrentVec is the right hand side.
20897       // Update the mask accordingly.
20898       SV1 = CurrentVec;
20899       Mask.push_back(Idx + NumElts);
20900     }
20901 
20902     // Check if all indices in Mask are Undef. In case, propagate Undef.
20903     bool isUndefMask = true;
20904     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
20905       isUndefMask &= Mask[i] < 0;
20906 
20907     if (isUndefMask)
20908       return DAG.getUNDEF(VT);
20909 
20910     if (!SV0.getNode())
20911       SV0 = DAG.getUNDEF(VT);
20912     if (!SV1.getNode())
20913       SV1 = DAG.getUNDEF(VT);
20914 
20915     // Avoid introducing shuffles with illegal mask.
20916     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
20917     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
20918     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
20919     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
20920     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
20921     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
20922     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
20923   }
20924 
20925   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
20926     return V;
20927 
20928   return SDValue();
20929 }
20930 
20931 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
20932   SDValue InVal = N->getOperand(0);
20933   EVT VT = N->getValueType(0);
20934 
20935   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
20936   // with a VECTOR_SHUFFLE and possible truncate.
20937   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
20938       VT.isFixedLengthVector() &&
20939       InVal->getOperand(0).getValueType().isFixedLengthVector()) {
20940     SDValue InVec = InVal->getOperand(0);
20941     SDValue EltNo = InVal->getOperand(1);
20942     auto InVecT = InVec.getValueType();
20943     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
20944       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
20945       int Elt = C0->getZExtValue();
20946       NewMask[0] = Elt;
20947       // If we have an implict truncate do truncate here as long as it's legal.
20948       // if it's not legal, this should
20949       if (VT.getScalarType() != InVal.getValueType() &&
20950           InVal.getValueType().isScalarInteger() &&
20951           isTypeLegal(VT.getScalarType())) {
20952         SDValue Val =
20953             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
20954         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
20955       }
20956       if (VT.getScalarType() == InVecT.getScalarType() &&
20957           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
20958         SDValue LegalShuffle =
20959           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
20960                                       DAG.getUNDEF(InVecT), NewMask, DAG);
20961         if (LegalShuffle) {
20962           // If the initial vector is the correct size this shuffle is a
20963           // valid result.
20964           if (VT == InVecT)
20965             return LegalShuffle;
20966           // If not we must truncate the vector.
20967           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
20968             SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
20969             EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
20970                                          InVecT.getVectorElementType(),
20971                                          VT.getVectorNumElements());
20972             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
20973                                LegalShuffle, ZeroIdx);
20974           }
20975         }
20976       }
20977     }
20978   }
20979 
20980   return SDValue();
20981 }
20982 
20983 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
20984   EVT VT = N->getValueType(0);
20985   SDValue N0 = N->getOperand(0);
20986   SDValue N1 = N->getOperand(1);
20987   SDValue N2 = N->getOperand(2);
20988   uint64_t InsIdx = N->getConstantOperandVal(2);
20989 
20990   // If inserting an UNDEF, just return the original vector.
20991   if (N1.isUndef())
20992     return N0;
20993 
20994   // If this is an insert of an extracted vector into an undef vector, we can
20995   // just use the input to the extract.
20996   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
20997       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
20998     return N1.getOperand(0);
20999 
21000   // If we are inserting a bitcast value into an undef, with the same
21001   // number of elements, just use the bitcast input of the extract.
21002   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
21003   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
21004   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
21005       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
21006       N1.getOperand(0).getOperand(1) == N2 &&
21007       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
21008           VT.getVectorNumElements() &&
21009       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
21010           VT.getSizeInBits()) {
21011     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
21012   }
21013 
21014   // If both N1 and N2 are bitcast values on which insert_subvector
21015   // would makes sense, pull the bitcast through.
21016   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
21017   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
21018   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
21019     SDValue CN0 = N0.getOperand(0);
21020     SDValue CN1 = N1.getOperand(0);
21021     EVT CN0VT = CN0.getValueType();
21022     EVT CN1VT = CN1.getValueType();
21023     if (CN0VT.isVector() && CN1VT.isVector() &&
21024         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
21025         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
21026       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
21027                                       CN0.getValueType(), CN0, CN1, N2);
21028       return DAG.getBitcast(VT, NewINSERT);
21029     }
21030   }
21031 
21032   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
21033   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
21034   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
21035   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
21036       N0.getOperand(1).getValueType() == N1.getValueType() &&
21037       N0.getOperand(2) == N2)
21038     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
21039                        N1, N2);
21040 
21041   // Eliminate an intermediate insert into an undef vector:
21042   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
21043   // insert_subvector undef, X, N2
21044   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
21045       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
21046     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
21047                        N1.getOperand(1), N2);
21048 
21049   // Push subvector bitcasts to the output, adjusting the index as we go.
21050   // insert_subvector(bitcast(v), bitcast(s), c1)
21051   // -> bitcast(insert_subvector(v, s, c2))
21052   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
21053       N1.getOpcode() == ISD::BITCAST) {
21054     SDValue N0Src = peekThroughBitcasts(N0);
21055     SDValue N1Src = peekThroughBitcasts(N1);
21056     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
21057     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
21058     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
21059         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
21060       EVT NewVT;
21061       SDLoc DL(N);
21062       SDValue NewIdx;
21063       LLVMContext &Ctx = *DAG.getContext();
21064       ElementCount NumElts = VT.getVectorElementCount();
21065       unsigned EltSizeInBits = VT.getScalarSizeInBits();
21066       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
21067         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
21068         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
21069         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
21070       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
21071         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
21072         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
21073           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
21074                                    NumElts.divideCoefficientBy(Scale));
21075           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
21076         }
21077       }
21078       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
21079         SDValue Res = DAG.getBitcast(NewVT, N0Src);
21080         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
21081         return DAG.getBitcast(VT, Res);
21082       }
21083     }
21084   }
21085 
21086   // Canonicalize insert_subvector dag nodes.
21087   // Example:
21088   // (insert_subvector (insert_subvector A, Idx0), Idx1)
21089   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
21090   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
21091       N1.getValueType() == N0.getOperand(1).getValueType()) {
21092     unsigned OtherIdx = N0.getConstantOperandVal(2);
21093     if (InsIdx < OtherIdx) {
21094       // Swap nodes.
21095       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
21096                                   N0.getOperand(0), N1, N2);
21097       AddToWorklist(NewOp.getNode());
21098       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
21099                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
21100     }
21101   }
21102 
21103   // If the input vector is a concatenation, and the insert replaces
21104   // one of the pieces, we can optimize into a single concat_vectors.
21105   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
21106       N0.getOperand(0).getValueType() == N1.getValueType()) {
21107     unsigned Factor = N1.getValueType().getVectorNumElements();
21108     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
21109     Ops[InsIdx / Factor] = N1;
21110     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
21111   }
21112 
21113   // Simplify source operands based on insertion.
21114   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
21115     return SDValue(N, 0);
21116 
21117   return SDValue();
21118 }
21119 
21120 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
21121   SDValue N0 = N->getOperand(0);
21122 
21123   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
21124   if (N0->getOpcode() == ISD::FP16_TO_FP)
21125     return N0->getOperand(0);
21126 
21127   return SDValue();
21128 }
21129 
21130 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
21131   SDValue N0 = N->getOperand(0);
21132 
21133   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
21134   if (N0->getOpcode() == ISD::AND) {
21135     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
21136     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
21137       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
21138                          N0.getOperand(0));
21139     }
21140   }
21141 
21142   return SDValue();
21143 }
21144 
21145 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
21146   SDValue N0 = N->getOperand(0);
21147   EVT VT = N0.getValueType();
21148   unsigned Opcode = N->getOpcode();
21149 
21150   // VECREDUCE over 1-element vector is just an extract.
21151   if (VT.getVectorElementCount().isScalar()) {
21152     SDLoc dl(N);
21153     SDValue Res =
21154         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
21155                     DAG.getVectorIdxConstant(0, dl));
21156     if (Res.getValueType() != N->getValueType(0))
21157       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
21158     return Res;
21159   }
21160 
21161   // On an boolean vector an and/or reduction is the same as a umin/umax
21162   // reduction. Convert them if the latter is legal while the former isn't.
21163   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
21164     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
21165         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
21166     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
21167         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
21168         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
21169       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
21170   }
21171 
21172   return SDValue();
21173 }
21174 
21175 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
21176 /// with the destination vector and a zero vector.
21177 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
21178 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
21179 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
21180   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
21181 
21182   EVT VT = N->getValueType(0);
21183   SDValue LHS = N->getOperand(0);
21184   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
21185   SDLoc DL(N);
21186 
21187   // Make sure we're not running after operation legalization where it
21188   // may have custom lowered the vector shuffles.
21189   if (LegalOperations)
21190     return SDValue();
21191 
21192   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
21193     return SDValue();
21194 
21195   EVT RVT = RHS.getValueType();
21196   unsigned NumElts = RHS.getNumOperands();
21197 
21198   // Attempt to create a valid clear mask, splitting the mask into
21199   // sub elements and checking to see if each is
21200   // all zeros or all ones - suitable for shuffle masking.
21201   auto BuildClearMask = [&](int Split) {
21202     int NumSubElts = NumElts * Split;
21203     int NumSubBits = RVT.getScalarSizeInBits() / Split;
21204 
21205     SmallVector<int, 8> Indices;
21206     for (int i = 0; i != NumSubElts; ++i) {
21207       int EltIdx = i / Split;
21208       int SubIdx = i % Split;
21209       SDValue Elt = RHS.getOperand(EltIdx);
21210       // X & undef --> 0 (not undef). So this lane must be converted to choose
21211       // from the zero constant vector (same as if the element had all 0-bits).
21212       if (Elt.isUndef()) {
21213         Indices.push_back(i + NumSubElts);
21214         continue;
21215       }
21216 
21217       APInt Bits;
21218       if (isa<ConstantSDNode>(Elt))
21219         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
21220       else if (isa<ConstantFPSDNode>(Elt))
21221         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
21222       else
21223         return SDValue();
21224 
21225       // Extract the sub element from the constant bit mask.
21226       if (DAG.getDataLayout().isBigEndian())
21227         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
21228       else
21229         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
21230 
21231       if (Bits.isAllOnesValue())
21232         Indices.push_back(i);
21233       else if (Bits == 0)
21234         Indices.push_back(i + NumSubElts);
21235       else
21236         return SDValue();
21237     }
21238 
21239     // Let's see if the target supports this vector_shuffle.
21240     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
21241     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
21242     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
21243       return SDValue();
21244 
21245     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
21246     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
21247                                                    DAG.getBitcast(ClearVT, LHS),
21248                                                    Zero, Indices));
21249   };
21250 
21251   // Determine maximum split level (byte level masking).
21252   int MaxSplit = 1;
21253   if (RVT.getScalarSizeInBits() % 8 == 0)
21254     MaxSplit = RVT.getScalarSizeInBits() / 8;
21255 
21256   for (int Split = 1; Split <= MaxSplit; ++Split)
21257     if (RVT.getScalarSizeInBits() % Split == 0)
21258       if (SDValue S = BuildClearMask(Split))
21259         return S;
21260 
21261   return SDValue();
21262 }
21263 
21264 /// If a vector binop is performed on splat values, it may be profitable to
21265 /// extract, scalarize, and insert/splat.
21266 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
21267   SDValue N0 = N->getOperand(0);
21268   SDValue N1 = N->getOperand(1);
21269   unsigned Opcode = N->getOpcode();
21270   EVT VT = N->getValueType(0);
21271   EVT EltVT = VT.getVectorElementType();
21272   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21273 
21274   // TODO: Remove/replace the extract cost check? If the elements are available
21275   //       as scalars, then there may be no extract cost. Should we ask if
21276   //       inserting a scalar back into a vector is cheap instead?
21277   int Index0, Index1;
21278   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
21279   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
21280   if (!Src0 || !Src1 || Index0 != Index1 ||
21281       Src0.getValueType().getVectorElementType() != EltVT ||
21282       Src1.getValueType().getVectorElementType() != EltVT ||
21283       !TLI.isExtractVecEltCheap(VT, Index0) ||
21284       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
21285     return SDValue();
21286 
21287   SDLoc DL(N);
21288   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
21289   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
21290   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
21291   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
21292 
21293   // If all lanes but 1 are undefined, no need to splat the scalar result.
21294   // TODO: Keep track of undefs and use that info in the general case.
21295   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
21296       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
21297       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
21298     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
21299     // build_vec ..undef, (bo X, Y), undef...
21300     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
21301     Ops[Index0] = ScalarBO;
21302     return DAG.getBuildVector(VT, DL, Ops);
21303   }
21304 
21305   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
21306   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
21307   return DAG.getBuildVector(VT, DL, Ops);
21308 }
21309 
21310 /// Visit a binary vector operation, like ADD.
21311 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
21312   assert(N->getValueType(0).isVector() &&
21313          "SimplifyVBinOp only works on vectors!");
21314 
21315   SDValue LHS = N->getOperand(0);
21316   SDValue RHS = N->getOperand(1);
21317   SDValue Ops[] = {LHS, RHS};
21318   EVT VT = N->getValueType(0);
21319   unsigned Opcode = N->getOpcode();
21320   SDNodeFlags Flags = N->getFlags();
21321 
21322   // See if we can constant fold the vector operation.
21323   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
21324           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
21325     return Fold;
21326 
21327   // Move unary shuffles with identical masks after a vector binop:
21328   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
21329   //   --> shuffle (VBinOp A, B), Undef, Mask
21330   // This does not require type legality checks because we are creating the
21331   // same types of operations that are in the original sequence. We do have to
21332   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
21333   // though. This code is adapted from the identical transform in instcombine.
21334   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
21335       Opcode != ISD::UREM && Opcode != ISD::SREM &&
21336       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
21337     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
21338     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
21339     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
21340         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
21341         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
21342       SDLoc DL(N);
21343       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
21344                                      RHS.getOperand(0), Flags);
21345       SDValue UndefV = LHS.getOperand(1);
21346       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
21347     }
21348 
21349     // Try to sink a splat shuffle after a binop with a uniform constant.
21350     // This is limited to cases where neither the shuffle nor the constant have
21351     // undefined elements because that could be poison-unsafe or inhibit
21352     // demanded elements analysis. It is further limited to not change a splat
21353     // of an inserted scalar because that may be optimized better by
21354     // load-folding or other target-specific behaviors.
21355     if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) &&
21356         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
21357         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21358       // binop (splat X), (splat C) --> splat (binop X, C)
21359       SDLoc DL(N);
21360       SDValue X = Shuf0->getOperand(0);
21361       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
21362       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21363                                   Shuf0->getMask());
21364     }
21365     if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) &&
21366         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
21367         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
21368       // binop (splat C), (splat X) --> splat (binop C, X)
21369       SDLoc DL(N);
21370       SDValue X = Shuf1->getOperand(0);
21371       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
21372       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
21373                                   Shuf1->getMask());
21374     }
21375   }
21376 
21377   // The following pattern is likely to emerge with vector reduction ops. Moving
21378   // the binary operation ahead of insertion may allow using a narrower vector
21379   // instruction that has better performance than the wide version of the op:
21380   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
21381   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
21382       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
21383       LHS.getOperand(2) == RHS.getOperand(2) &&
21384       (LHS.hasOneUse() || RHS.hasOneUse())) {
21385     SDValue X = LHS.getOperand(1);
21386     SDValue Y = RHS.getOperand(1);
21387     SDValue Z = LHS.getOperand(2);
21388     EVT NarrowVT = X.getValueType();
21389     if (NarrowVT == Y.getValueType() &&
21390         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
21391                                               LegalOperations)) {
21392       // (binop undef, undef) may not return undef, so compute that result.
21393       SDLoc DL(N);
21394       SDValue VecC =
21395           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
21396       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
21397       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
21398     }
21399   }
21400 
21401   // Make sure all but the first op are undef or constant.
21402   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
21403     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
21404            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
21405                      [](const SDValue &Op) {
21406                        return Op.isUndef() ||
21407                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
21408                      });
21409   };
21410 
21411   // The following pattern is likely to emerge with vector reduction ops. Moving
21412   // the binary operation ahead of the concat may allow using a narrower vector
21413   // instruction that has better performance than the wide version of the op:
21414   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
21415   //   concat (VBinOp X, Y), VecC
21416   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
21417       (LHS.hasOneUse() || RHS.hasOneUse())) {
21418     EVT NarrowVT = LHS.getOperand(0).getValueType();
21419     if (NarrowVT == RHS.getOperand(0).getValueType() &&
21420         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
21421       SDLoc DL(N);
21422       unsigned NumOperands = LHS.getNumOperands();
21423       SmallVector<SDValue, 4> ConcatOps;
21424       for (unsigned i = 0; i != NumOperands; ++i) {
21425         // This constant fold for operands 1 and up.
21426         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
21427                                         RHS.getOperand(i)));
21428       }
21429 
21430       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
21431     }
21432   }
21433 
21434   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
21435     return V;
21436 
21437   return SDValue();
21438 }
21439 
21440 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
21441                                     SDValue N2) {
21442   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
21443 
21444   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
21445                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
21446 
21447   // If we got a simplified select_cc node back from SimplifySelectCC, then
21448   // break it down into a new SETCC node, and a new SELECT node, and then return
21449   // the SELECT node, since we were called with a SELECT node.
21450   if (SCC.getNode()) {
21451     // Check to see if we got a select_cc back (to turn into setcc/select).
21452     // Otherwise, just return whatever node we got back, like fabs.
21453     if (SCC.getOpcode() == ISD::SELECT_CC) {
21454       const SDNodeFlags Flags = N0.getNode()->getFlags();
21455       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
21456                                   N0.getValueType(),
21457                                   SCC.getOperand(0), SCC.getOperand(1),
21458                                   SCC.getOperand(4), Flags);
21459       AddToWorklist(SETCC.getNode());
21460       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
21461                                          SCC.getOperand(2), SCC.getOperand(3));
21462       SelectNode->setFlags(Flags);
21463       return SelectNode;
21464     }
21465 
21466     return SCC;
21467   }
21468   return SDValue();
21469 }
21470 
21471 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
21472 /// being selected between, see if we can simplify the select.  Callers of this
21473 /// should assume that TheSelect is deleted if this returns true.  As such, they
21474 /// should return the appropriate thing (e.g. the node) back to the top-level of
21475 /// the DAG combiner loop to avoid it being looked at.
21476 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
21477                                     SDValue RHS) {
21478   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21479   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
21480   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
21481     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
21482       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
21483       SDValue Sqrt = RHS;
21484       ISD::CondCode CC;
21485       SDValue CmpLHS;
21486       const ConstantFPSDNode *Zero = nullptr;
21487 
21488       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
21489         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
21490         CmpLHS = TheSelect->getOperand(0);
21491         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
21492       } else {
21493         // SELECT or VSELECT
21494         SDValue Cmp = TheSelect->getOperand(0);
21495         if (Cmp.getOpcode() == ISD::SETCC) {
21496           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
21497           CmpLHS = Cmp.getOperand(0);
21498           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
21499         }
21500       }
21501       if (Zero && Zero->isZero() &&
21502           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
21503           CC == ISD::SETULT || CC == ISD::SETLT)) {
21504         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
21505         CombineTo(TheSelect, Sqrt);
21506         return true;
21507       }
21508     }
21509   }
21510   // Cannot simplify select with vector condition
21511   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
21512 
21513   // If this is a select from two identical things, try to pull the operation
21514   // through the select.
21515   if (LHS.getOpcode() != RHS.getOpcode() ||
21516       !LHS.hasOneUse() || !RHS.hasOneUse())
21517     return false;
21518 
21519   // If this is a load and the token chain is identical, replace the select
21520   // of two loads with a load through a select of the address to load from.
21521   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
21522   // constants have been dropped into the constant pool.
21523   if (LHS.getOpcode() == ISD::LOAD) {
21524     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
21525     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
21526 
21527     // Token chains must be identical.
21528     if (LHS.getOperand(0) != RHS.getOperand(0) ||
21529         // Do not let this transformation reduce the number of volatile loads.
21530         // Be conservative for atomics for the moment
21531         // TODO: This does appear to be legal for unordered atomics (see D66309)
21532         !LLD->isSimple() || !RLD->isSimple() ||
21533         // FIXME: If either is a pre/post inc/dec load,
21534         // we'd need to split out the address adjustment.
21535         LLD->isIndexed() || RLD->isIndexed() ||
21536         // If this is an EXTLOAD, the VT's must match.
21537         LLD->getMemoryVT() != RLD->getMemoryVT() ||
21538         // If this is an EXTLOAD, the kind of extension must match.
21539         (LLD->getExtensionType() != RLD->getExtensionType() &&
21540          // The only exception is if one of the extensions is anyext.
21541          LLD->getExtensionType() != ISD::EXTLOAD &&
21542          RLD->getExtensionType() != ISD::EXTLOAD) ||
21543         // FIXME: this discards src value information.  This is
21544         // over-conservative. It would be beneficial to be able to remember
21545         // both potential memory locations.  Since we are discarding
21546         // src value info, don't do the transformation if the memory
21547         // locations are not in the default address space.
21548         LLD->getPointerInfo().getAddrSpace() != 0 ||
21549         RLD->getPointerInfo().getAddrSpace() != 0 ||
21550         // We can't produce a CMOV of a TargetFrameIndex since we won't
21551         // generate the address generation required.
21552         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21553         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
21554         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
21555                                       LLD->getBasePtr().getValueType()))
21556       return false;
21557 
21558     // The loads must not depend on one another.
21559     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
21560       return false;
21561 
21562     // Check that the select condition doesn't reach either load.  If so,
21563     // folding this will induce a cycle into the DAG.  If not, this is safe to
21564     // xform, so create a select of the addresses.
21565 
21566     SmallPtrSet<const SDNode *, 32> Visited;
21567     SmallVector<const SDNode *, 16> Worklist;
21568 
21569     // Always fail if LLD and RLD are not independent. TheSelect is a
21570     // predecessor to all Nodes in question so we need not search past it.
21571 
21572     Visited.insert(TheSelect);
21573     Worklist.push_back(LLD);
21574     Worklist.push_back(RLD);
21575 
21576     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
21577         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
21578       return false;
21579 
21580     SDValue Addr;
21581     if (TheSelect->getOpcode() == ISD::SELECT) {
21582       // We cannot do this optimization if any pair of {RLD, LLD} is a
21583       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
21584       // Loads, we only need to check if CondNode is a successor to one of the
21585       // loads. We can further avoid this if there's no use of their chain
21586       // value.
21587       SDNode *CondNode = TheSelect->getOperand(0).getNode();
21588       Worklist.push_back(CondNode);
21589 
21590       if ((LLD->hasAnyUseOfValue(1) &&
21591            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21592           (RLD->hasAnyUseOfValue(1) &&
21593            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21594         return false;
21595 
21596       Addr = DAG.getSelect(SDLoc(TheSelect),
21597                            LLD->getBasePtr().getValueType(),
21598                            TheSelect->getOperand(0), LLD->getBasePtr(),
21599                            RLD->getBasePtr());
21600     } else {  // Otherwise SELECT_CC
21601       // We cannot do this optimization if any pair of {RLD, LLD} is a
21602       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
21603       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
21604       // one of the loads. We can further avoid this if there's no use of their
21605       // chain value.
21606 
21607       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
21608       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
21609       Worklist.push_back(CondLHS);
21610       Worklist.push_back(CondRHS);
21611 
21612       if ((LLD->hasAnyUseOfValue(1) &&
21613            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
21614           (RLD->hasAnyUseOfValue(1) &&
21615            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
21616         return false;
21617 
21618       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
21619                          LLD->getBasePtr().getValueType(),
21620                          TheSelect->getOperand(0),
21621                          TheSelect->getOperand(1),
21622                          LLD->getBasePtr(), RLD->getBasePtr(),
21623                          TheSelect->getOperand(4));
21624     }
21625 
21626     SDValue Load;
21627     // It is safe to replace the two loads if they have different alignments,
21628     // but the new load must be the minimum (most restrictive) alignment of the
21629     // inputs.
21630     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
21631     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
21632     if (!RLD->isInvariant())
21633       MMOFlags &= ~MachineMemOperand::MOInvariant;
21634     if (!RLD->isDereferenceable())
21635       MMOFlags &= ~MachineMemOperand::MODereferenceable;
21636     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
21637       // FIXME: Discards pointer and AA info.
21638       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
21639                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
21640                          MMOFlags);
21641     } else {
21642       // FIXME: Discards pointer and AA info.
21643       Load = DAG.getExtLoad(
21644           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
21645                                                   : LLD->getExtensionType(),
21646           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
21647           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
21648     }
21649 
21650     // Users of the select now use the result of the load.
21651     CombineTo(TheSelect, Load);
21652 
21653     // Users of the old loads now use the new load's chain.  We know the
21654     // old-load value is dead now.
21655     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
21656     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
21657     return true;
21658   }
21659 
21660   return false;
21661 }
21662 
21663 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
21664 /// bitwise 'and'.
21665 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
21666                                             SDValue N1, SDValue N2, SDValue N3,
21667                                             ISD::CondCode CC) {
21668   // If this is a select where the false operand is zero and the compare is a
21669   // check of the sign bit, see if we can perform the "gzip trick":
21670   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
21671   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
21672   EVT XType = N0.getValueType();
21673   EVT AType = N2.getValueType();
21674   if (!isNullConstant(N3) || !XType.bitsGE(AType))
21675     return SDValue();
21676 
21677   // If the comparison is testing for a positive value, we have to invert
21678   // the sign bit mask, so only do that transform if the target has a bitwise
21679   // 'and not' instruction (the invert is free).
21680   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
21681     // (X > -1) ? A : 0
21682     // (X >  0) ? X : 0 <-- This is canonical signed max.
21683     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
21684       return SDValue();
21685   } else if (CC == ISD::SETLT) {
21686     // (X <  0) ? A : 0
21687     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
21688     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
21689       return SDValue();
21690   } else {
21691     return SDValue();
21692   }
21693 
21694   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
21695   // constant.
21696   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
21697   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21698   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
21699     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
21700     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
21701       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21702       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
21703       AddToWorklist(Shift.getNode());
21704 
21705       if (XType.bitsGT(AType)) {
21706         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21707         AddToWorklist(Shift.getNode());
21708       }
21709 
21710       if (CC == ISD::SETGT)
21711         Shift = DAG.getNOT(DL, Shift, AType);
21712 
21713       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21714     }
21715   }
21716 
21717   unsigned ShCt = XType.getSizeInBits() - 1;
21718   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
21719     return SDValue();
21720 
21721   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
21722   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
21723   AddToWorklist(Shift.getNode());
21724 
21725   if (XType.bitsGT(AType)) {
21726     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
21727     AddToWorklist(Shift.getNode());
21728   }
21729 
21730   if (CC == ISD::SETGT)
21731     Shift = DAG.getNOT(DL, Shift, AType);
21732 
21733   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
21734 }
21735 
21736 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
21737 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
21738   SDValue N0 = N->getOperand(0);
21739   EVT VT = N->getValueType(0);
21740   bool IsFabs = N->getOpcode() == ISD::FABS;
21741   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
21742 
21743   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
21744     return SDValue();
21745 
21746   SDValue Int = N0.getOperand(0);
21747   EVT IntVT = Int.getValueType();
21748 
21749   // The operand to cast should be integer.
21750   if (!IntVT.isInteger() || IntVT.isVector())
21751     return SDValue();
21752 
21753   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
21754   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
21755   APInt SignMask;
21756   if (N0.getValueType().isVector()) {
21757     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
21758     // 0x7f...) per element and splat it.
21759     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
21760     if (IsFabs)
21761       SignMask = ~SignMask;
21762     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
21763   } else {
21764     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
21765     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
21766     if (IsFabs)
21767       SignMask = ~SignMask;
21768   }
21769   SDLoc DL(N0);
21770   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
21771                     DAG.getConstant(SignMask, DL, IntVT));
21772   AddToWorklist(Int.getNode());
21773   return DAG.getBitcast(VT, Int);
21774 }
21775 
21776 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
21777 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
21778 /// in it. This may be a win when the constant is not otherwise available
21779 /// because it replaces two constant pool loads with one.
21780 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
21781     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
21782     ISD::CondCode CC) {
21783   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
21784     return SDValue();
21785 
21786   // If we are before legalize types, we want the other legalization to happen
21787   // first (for example, to avoid messing with soft float).
21788   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
21789   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
21790   EVT VT = N2.getValueType();
21791   if (!TV || !FV || !TLI.isTypeLegal(VT))
21792     return SDValue();
21793 
21794   // If a constant can be materialized without loads, this does not make sense.
21795   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
21796       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
21797       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
21798     return SDValue();
21799 
21800   // If both constants have multiple uses, then we won't need to do an extra
21801   // load. The values are likely around in registers for other users.
21802   if (!TV->hasOneUse() && !FV->hasOneUse())
21803     return SDValue();
21804 
21805   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
21806                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
21807   Type *FPTy = Elts[0]->getType();
21808   const DataLayout &TD = DAG.getDataLayout();
21809 
21810   // Create a ConstantArray of the two constants.
21811   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
21812   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
21813                                       TD.getPrefTypeAlign(FPTy));
21814   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
21815 
21816   // Get offsets to the 0 and 1 elements of the array, so we can select between
21817   // them.
21818   SDValue Zero = DAG.getIntPtrConstant(0, DL);
21819   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
21820   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
21821   SDValue Cond =
21822       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
21823   AddToWorklist(Cond.getNode());
21824   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
21825   AddToWorklist(CstOffset.getNode());
21826   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
21827   AddToWorklist(CPIdx.getNode());
21828   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
21829                      MachinePointerInfo::getConstantPool(
21830                          DAG.getMachineFunction()), Alignment);
21831 }
21832 
21833 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
21834 /// where 'cond' is the comparison specified by CC.
21835 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
21836                                       SDValue N2, SDValue N3, ISD::CondCode CC,
21837                                       bool NotExtCompare) {
21838   // (x ? y : y) -> y.
21839   if (N2 == N3) return N2;
21840 
21841   EVT CmpOpVT = N0.getValueType();
21842   EVT CmpResVT = getSetCCResultType(CmpOpVT);
21843   EVT VT = N2.getValueType();
21844   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
21845   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
21846   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
21847 
21848   // Determine if the condition we're dealing with is constant.
21849   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
21850     AddToWorklist(SCC.getNode());
21851     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
21852       // fold select_cc true, x, y -> x
21853       // fold select_cc false, x, y -> y
21854       return !(SCCC->isNullValue()) ? N2 : N3;
21855     }
21856   }
21857 
21858   if (SDValue V =
21859           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
21860     return V;
21861 
21862   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
21863     return V;
21864 
21865   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
21866   // where y is has a single bit set.
21867   // A plaintext description would be, we can turn the SELECT_CC into an AND
21868   // when the condition can be materialized as an all-ones register.  Any
21869   // single bit-test can be materialized as an all-ones register with
21870   // shift-left and shift-right-arith.
21871   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
21872       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
21873     SDValue AndLHS = N0->getOperand(0);
21874     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
21875     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
21876       // Shift the tested bit over the sign bit.
21877       const APInt &AndMask = ConstAndRHS->getAPIntValue();
21878       unsigned ShCt = AndMask.getBitWidth() - 1;
21879       if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
21880         SDValue ShlAmt =
21881           DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
21882                           getShiftAmountTy(AndLHS.getValueType()));
21883         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
21884 
21885         // Now arithmetic right shift it all the way over, so the result is
21886         // either all-ones, or zero.
21887         SDValue ShrAmt =
21888           DAG.getConstant(ShCt, SDLoc(Shl),
21889                           getShiftAmountTy(Shl.getValueType()));
21890         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
21891 
21892         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
21893       }
21894     }
21895   }
21896 
21897   // fold select C, 16, 0 -> shl C, 4
21898   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
21899   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
21900 
21901   if ((Fold || Swap) &&
21902       TLI.getBooleanContents(CmpOpVT) ==
21903           TargetLowering::ZeroOrOneBooleanContent &&
21904       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
21905 
21906     if (Swap) {
21907       CC = ISD::getSetCCInverse(CC, CmpOpVT);
21908       std::swap(N2C, N3C);
21909     }
21910 
21911     // If the caller doesn't want us to simplify this into a zext of a compare,
21912     // don't do it.
21913     if (NotExtCompare && N2C->isOne())
21914       return SDValue();
21915 
21916     SDValue Temp, SCC;
21917     // zext (setcc n0, n1)
21918     if (LegalTypes) {
21919       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
21920       if (VT.bitsLT(SCC.getValueType()))
21921         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
21922       else
21923         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21924     } else {
21925       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
21926       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
21927     }
21928 
21929     AddToWorklist(SCC.getNode());
21930     AddToWorklist(Temp.getNode());
21931 
21932     if (N2C->isOne())
21933       return Temp;
21934 
21935     unsigned ShCt = N2C->getAPIntValue().logBase2();
21936     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
21937       return SDValue();
21938 
21939     // shl setcc result by log2 n2c
21940     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
21941                        DAG.getConstant(ShCt, SDLoc(Temp),
21942                                        getShiftAmountTy(Temp.getValueType())));
21943   }
21944 
21945   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
21946   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
21947   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
21948   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
21949   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
21950   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
21951   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
21952   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
21953   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
21954     SDValue ValueOnZero = N2;
21955     SDValue Count = N3;
21956     // If the condition is NE instead of E, swap the operands.
21957     if (CC == ISD::SETNE)
21958       std::swap(ValueOnZero, Count);
21959     // Check if the value on zero is a constant equal to the bits in the type.
21960     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
21961       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
21962         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
21963         // legal, combine to just cttz.
21964         if ((Count.getOpcode() == ISD::CTTZ ||
21965              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
21966             N0 == Count.getOperand(0) &&
21967             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
21968           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
21969         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
21970         // legal, combine to just ctlz.
21971         if ((Count.getOpcode() == ISD::CTLZ ||
21972              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
21973             N0 == Count.getOperand(0) &&
21974             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
21975           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
21976       }
21977     }
21978   }
21979 
21980   return SDValue();
21981 }
21982 
21983 /// This is a stub for TargetLowering::SimplifySetCC.
21984 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
21985                                    ISD::CondCode Cond, const SDLoc &DL,
21986                                    bool foldBooleans) {
21987   TargetLowering::DAGCombinerInfo
21988     DagCombineInfo(DAG, Level, false, this);
21989   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
21990 }
21991 
21992 /// Given an ISD::SDIV node expressing a divide by constant, return
21993 /// a DAG expression to select that will generate the same value by multiplying
21994 /// by a magic number.
21995 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
21996 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
21997   // when optimising for minimum size, we don't want to expand a div to a mul
21998   // and a shift.
21999   if (DAG.getMachineFunction().getFunction().hasMinSize())
22000     return SDValue();
22001 
22002   SmallVector<SDNode *, 8> Built;
22003   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
22004     for (SDNode *N : Built)
22005       AddToWorklist(N);
22006     return S;
22007   }
22008 
22009   return SDValue();
22010 }
22011 
22012 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
22013 /// DAG expression that will generate the same value by right shifting.
22014 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
22015   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
22016   if (!C)
22017     return SDValue();
22018 
22019   // Avoid division by zero.
22020   if (C->isNullValue())
22021     return SDValue();
22022 
22023   SmallVector<SDNode *, 8> Built;
22024   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
22025     for (SDNode *N : Built)
22026       AddToWorklist(N);
22027     return S;
22028   }
22029 
22030   return SDValue();
22031 }
22032 
22033 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
22034 /// expression that will generate the same value by multiplying by a magic
22035 /// number.
22036 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
22037 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
22038   // when optimising for minimum size, we don't want to expand a div to a mul
22039   // and a shift.
22040   if (DAG.getMachineFunction().getFunction().hasMinSize())
22041     return SDValue();
22042 
22043   SmallVector<SDNode *, 8> Built;
22044   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
22045     for (SDNode *N : Built)
22046       AddToWorklist(N);
22047     return S;
22048   }
22049 
22050   return SDValue();
22051 }
22052 
22053 /// Determines the LogBase2 value for a non-null input value using the
22054 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
22055 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
22056   EVT VT = V.getValueType();
22057   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
22058   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
22059   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
22060   return LogBase2;
22061 }
22062 
22063 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22064 /// For the reciprocal, we need to find the zero of the function:
22065 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
22066 ///     =>
22067 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
22068 ///     does not require additional intermediate precision]
22069 /// For the last iteration, put numerator N into it to gain more precision:
22070 ///   Result = N X_i + X_i (N - N A X_i)
22071 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
22072                                       SDNodeFlags Flags) {
22073   if (LegalDAG)
22074     return SDValue();
22075 
22076   // TODO: Handle half and/or extended types?
22077   EVT VT = Op.getValueType();
22078   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22079     return SDValue();
22080 
22081   // If estimates are explicitly disabled for this function, we're done.
22082   MachineFunction &MF = DAG.getMachineFunction();
22083   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
22084   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22085     return SDValue();
22086 
22087   // Estimates may be explicitly enabled for this type with a custom number of
22088   // refinement steps.
22089   int Iterations = TLI.getDivRefinementSteps(VT, MF);
22090   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
22091     AddToWorklist(Est.getNode());
22092 
22093     SDLoc DL(Op);
22094     if (Iterations) {
22095       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
22096 
22097       // Newton iterations: Est = Est + Est (N - Arg * Est)
22098       // If this is the last iteration, also multiply by the numerator.
22099       for (int i = 0; i < Iterations; ++i) {
22100         SDValue MulEst = Est;
22101 
22102         if (i == Iterations - 1) {
22103           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
22104           AddToWorklist(MulEst.getNode());
22105         }
22106 
22107         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
22108         AddToWorklist(NewEst.getNode());
22109 
22110         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
22111                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
22112         AddToWorklist(NewEst.getNode());
22113 
22114         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22115         AddToWorklist(NewEst.getNode());
22116 
22117         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
22118         AddToWorklist(Est.getNode());
22119       }
22120     } else {
22121       // If no iterations are available, multiply with N.
22122       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
22123       AddToWorklist(Est.getNode());
22124     }
22125 
22126     return Est;
22127   }
22128 
22129   return SDValue();
22130 }
22131 
22132 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22133 /// For the reciprocal sqrt, we need to find the zero of the function:
22134 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22135 ///     =>
22136 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
22137 /// As a result, we precompute A/2 prior to the iteration loop.
22138 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
22139                                          unsigned Iterations,
22140                                          SDNodeFlags Flags, bool Reciprocal) {
22141   EVT VT = Arg.getValueType();
22142   SDLoc DL(Arg);
22143   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
22144 
22145   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
22146   // this entire sequence requires only one FP constant.
22147   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
22148   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
22149 
22150   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
22151   for (unsigned i = 0; i < Iterations; ++i) {
22152     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
22153     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
22154     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
22155     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
22156   }
22157 
22158   // If non-reciprocal square root is requested, multiply the result by Arg.
22159   if (!Reciprocal)
22160     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
22161 
22162   return Est;
22163 }
22164 
22165 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
22166 /// For the reciprocal sqrt, we need to find the zero of the function:
22167 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
22168 ///     =>
22169 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
22170 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
22171                                          unsigned Iterations,
22172                                          SDNodeFlags Flags, bool Reciprocal) {
22173   EVT VT = Arg.getValueType();
22174   SDLoc DL(Arg);
22175   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
22176   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
22177 
22178   // This routine must enter the loop below to work correctly
22179   // when (Reciprocal == false).
22180   assert(Iterations > 0);
22181 
22182   // Newton iterations for reciprocal square root:
22183   // E = (E * -0.5) * ((A * E) * E + -3.0)
22184   for (unsigned i = 0; i < Iterations; ++i) {
22185     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
22186     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
22187     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
22188 
22189     // When calculating a square root at the last iteration build:
22190     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
22191     // (notice a common subexpression)
22192     SDValue LHS;
22193     if (Reciprocal || (i + 1) < Iterations) {
22194       // RSQRT: LHS = (E * -0.5)
22195       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
22196     } else {
22197       // SQRT: LHS = (A * E) * -0.5
22198       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
22199     }
22200 
22201     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
22202   }
22203 
22204   return Est;
22205 }
22206 
22207 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
22208 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
22209 /// Op can be zero.
22210 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
22211                                            bool Reciprocal) {
22212   if (LegalDAG)
22213     return SDValue();
22214 
22215   // TODO: Handle half and/or extended types?
22216   EVT VT = Op.getValueType();
22217   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
22218     return SDValue();
22219 
22220   // If estimates are explicitly disabled for this function, we're done.
22221   MachineFunction &MF = DAG.getMachineFunction();
22222   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
22223   if (Enabled == TLI.ReciprocalEstimate::Disabled)
22224     return SDValue();
22225 
22226   // Estimates may be explicitly enabled for this type with a custom number of
22227   // refinement steps.
22228   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
22229 
22230   bool UseOneConstNR = false;
22231   if (SDValue Est =
22232       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
22233                           Reciprocal)) {
22234     AddToWorklist(Est.getNode());
22235 
22236     if (Iterations) {
22237       Est = UseOneConstNR
22238             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
22239             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
22240 
22241       if (!Reciprocal) {
22242         SDLoc DL(Op);
22243         EVT CCVT = getSetCCResultType(VT);
22244         SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
22245         DenormalMode DenormMode = DAG.getDenormalMode(VT);
22246         // Try the target specific test first.
22247         SDValue Test = TLI.getSqrtInputTest(Op, DAG, DenormMode);
22248         if (!Test) {
22249           // If no test provided by target, testing it with denormal inputs to
22250           // avoid wrong estimate.
22251           if (DenormMode.Input == DenormalMode::IEEE) {
22252             // This is specifically a check for the handling of denormal inputs,
22253             // not the result.
22254 
22255             // Test = fabs(X) < SmallestNormal
22256             const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
22257             APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
22258             SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
22259             SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
22260             Test = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
22261           } else
22262             // Test = X == 0.0
22263             Test = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
22264         }
22265 
22266         // The estimate is now completely wrong if the input was exactly 0.0 or
22267         // possibly a denormal. Force the answer to 0.0 or value provided by
22268         // target for those cases.
22269         Est = DAG.getNode(
22270             Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
22271             Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
22272       }
22273     }
22274     return Est;
22275   }
22276 
22277   return SDValue();
22278 }
22279 
22280 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22281   return buildSqrtEstimateImpl(Op, Flags, true);
22282 }
22283 
22284 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
22285   return buildSqrtEstimateImpl(Op, Flags, false);
22286 }
22287 
22288 /// Return true if there is any possibility that the two addresses overlap.
22289 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
22290 
22291   struct MemUseCharacteristics {
22292     bool IsVolatile;
22293     bool IsAtomic;
22294     SDValue BasePtr;
22295     int64_t Offset;
22296     Optional<int64_t> NumBytes;
22297     MachineMemOperand *MMO;
22298   };
22299 
22300   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
22301     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
22302       int64_t Offset = 0;
22303       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
22304         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
22305                      ? C->getSExtValue()
22306                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
22307                            ? -1 * C->getSExtValue()
22308                            : 0;
22309       uint64_t Size =
22310           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
22311       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
22312               Offset /*base offset*/,
22313               Optional<int64_t>(Size),
22314               LSN->getMemOperand()};
22315     }
22316     if (const auto *LN = cast<LifetimeSDNode>(N))
22317       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
22318               (LN->hasOffset()) ? LN->getOffset() : 0,
22319               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
22320                                 : Optional<int64_t>(),
22321               (MachineMemOperand *)nullptr};
22322     // Default.
22323     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
22324             (int64_t)0 /*offset*/,
22325             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
22326   };
22327 
22328   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
22329                         MUC1 = getCharacteristics(Op1);
22330 
22331   // If they are to the same address, then they must be aliases.
22332   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
22333       MUC0.Offset == MUC1.Offset)
22334     return true;
22335 
22336   // If they are both volatile then they cannot be reordered.
22337   if (MUC0.IsVolatile && MUC1.IsVolatile)
22338     return true;
22339 
22340   // Be conservative about atomics for the moment
22341   // TODO: This is way overconservative for unordered atomics (see D66309)
22342   if (MUC0.IsAtomic && MUC1.IsAtomic)
22343     return true;
22344 
22345   if (MUC0.MMO && MUC1.MMO) {
22346     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22347         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22348       return false;
22349   }
22350 
22351   // Try to prove that there is aliasing, or that there is no aliasing. Either
22352   // way, we can return now. If nothing can be proved, proceed with more tests.
22353   bool IsAlias;
22354   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
22355                                        DAG, IsAlias))
22356     return IsAlias;
22357 
22358   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
22359   // either are not known.
22360   if (!MUC0.MMO || !MUC1.MMO)
22361     return true;
22362 
22363   // If one operation reads from invariant memory, and the other may store, they
22364   // cannot alias. These should really be checking the equivalent of mayWrite,
22365   // but it only matters for memory nodes other than load /store.
22366   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
22367       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
22368     return false;
22369 
22370   // If we know required SrcValue1 and SrcValue2 have relatively large
22371   // alignment compared to the size and offset of the access, we may be able
22372   // to prove they do not alias. This check is conservative for now to catch
22373   // cases created by splitting vector types, it only works when the offsets are
22374   // multiples of the size of the data.
22375   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
22376   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
22377   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
22378   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
22379   auto &Size0 = MUC0.NumBytes;
22380   auto &Size1 = MUC1.NumBytes;
22381   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
22382       Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
22383       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
22384       SrcValOffset1 % *Size1 == 0) {
22385     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
22386     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
22387 
22388     // There is no overlap between these relatively aligned accesses of
22389     // similar size. Return no alias.
22390     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
22391       return false;
22392   }
22393 
22394   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
22395                    ? CombinerGlobalAA
22396                    : DAG.getSubtarget().useAA();
22397 #ifndef NDEBUG
22398   if (CombinerAAOnlyFunc.getNumOccurrences() &&
22399       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
22400     UseAA = false;
22401 #endif
22402 
22403   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
22404       Size0.hasValue() && Size1.hasValue()) {
22405     // Use alias analysis information.
22406     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
22407     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
22408     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
22409     AliasResult AAResult = AA->alias(
22410         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
22411                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
22412         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
22413                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
22414     if (AAResult == NoAlias)
22415       return false;
22416   }
22417 
22418   // Otherwise we have to assume they alias.
22419   return true;
22420 }
22421 
22422 /// Walk up chain skipping non-aliasing memory nodes,
22423 /// looking for aliasing nodes and adding them to the Aliases vector.
22424 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
22425                                    SmallVectorImpl<SDValue> &Aliases) {
22426   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
22427   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
22428 
22429   // Get alias information for node.
22430   // TODO: relax aliasing for unordered atomics (see D66309)
22431   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
22432 
22433   // Starting off.
22434   Chains.push_back(OriginalChain);
22435   unsigned Depth = 0;
22436 
22437   // Attempt to improve chain by a single step
22438   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
22439     switch (C.getOpcode()) {
22440     case ISD::EntryToken:
22441       // No need to mark EntryToken.
22442       C = SDValue();
22443       return true;
22444     case ISD::LOAD:
22445     case ISD::STORE: {
22446       // Get alias information for C.
22447       // TODO: Relax aliasing for unordered atomics (see D66309)
22448       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
22449                       cast<LSBaseSDNode>(C.getNode())->isSimple();
22450       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
22451         // Look further up the chain.
22452         C = C.getOperand(0);
22453         return true;
22454       }
22455       // Alias, so stop here.
22456       return false;
22457     }
22458 
22459     case ISD::CopyFromReg:
22460       // Always forward past past CopyFromReg.
22461       C = C.getOperand(0);
22462       return true;
22463 
22464     case ISD::LIFETIME_START:
22465     case ISD::LIFETIME_END: {
22466       // We can forward past any lifetime start/end that can be proven not to
22467       // alias the memory access.
22468       if (!isAlias(N, C.getNode())) {
22469         // Look further up the chain.
22470         C = C.getOperand(0);
22471         return true;
22472       }
22473       return false;
22474     }
22475     default:
22476       return false;
22477     }
22478   };
22479 
22480   // Look at each chain and determine if it is an alias.  If so, add it to the
22481   // aliases list.  If not, then continue up the chain looking for the next
22482   // candidate.
22483   while (!Chains.empty()) {
22484     SDValue Chain = Chains.pop_back_val();
22485 
22486     // Don't bother if we've seen Chain before.
22487     if (!Visited.insert(Chain.getNode()).second)
22488       continue;
22489 
22490     // For TokenFactor nodes, look at each operand and only continue up the
22491     // chain until we reach the depth limit.
22492     //
22493     // FIXME: The depth check could be made to return the last non-aliasing
22494     // chain we found before we hit a tokenfactor rather than the original
22495     // chain.
22496     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
22497       Aliases.clear();
22498       Aliases.push_back(OriginalChain);
22499       return;
22500     }
22501 
22502     if (Chain.getOpcode() == ISD::TokenFactor) {
22503       // We have to check each of the operands of the token factor for "small"
22504       // token factors, so we queue them up.  Adding the operands to the queue
22505       // (stack) in reverse order maintains the original order and increases the
22506       // likelihood that getNode will find a matching token factor (CSE.)
22507       if (Chain.getNumOperands() > 16) {
22508         Aliases.push_back(Chain);
22509         continue;
22510       }
22511       for (unsigned n = Chain.getNumOperands(); n;)
22512         Chains.push_back(Chain.getOperand(--n));
22513       ++Depth;
22514       continue;
22515     }
22516     // Everything else
22517     if (ImproveChain(Chain)) {
22518       // Updated Chain Found, Consider new chain if one exists.
22519       if (Chain.getNode())
22520         Chains.push_back(Chain);
22521       ++Depth;
22522       continue;
22523     }
22524     // No Improved Chain Possible, treat as Alias.
22525     Aliases.push_back(Chain);
22526   }
22527 }
22528 
22529 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
22530 /// (aliasing node.)
22531 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
22532   if (OptLevel == CodeGenOpt::None)
22533     return OldChain;
22534 
22535   // Ops for replacing token factor.
22536   SmallVector<SDValue, 8> Aliases;
22537 
22538   // Accumulate all the aliases to this node.
22539   GatherAllAliases(N, OldChain, Aliases);
22540 
22541   // If no operands then chain to entry token.
22542   if (Aliases.size() == 0)
22543     return DAG.getEntryNode();
22544 
22545   // If a single operand then chain to it.  We don't need to revisit it.
22546   if (Aliases.size() == 1)
22547     return Aliases[0];
22548 
22549   // Construct a custom tailored token factor.
22550   return DAG.getTokenFactor(SDLoc(N), Aliases);
22551 }
22552 
22553 namespace {
22554 // TODO: Replace with with std::monostate when we move to C++17.
22555 struct UnitT { } Unit;
22556 bool operator==(const UnitT &, const UnitT &) { return true; }
22557 bool operator!=(const UnitT &, const UnitT &) { return false; }
22558 } // namespace
22559 
22560 // This function tries to collect a bunch of potentially interesting
22561 // nodes to improve the chains of, all at once. This might seem
22562 // redundant, as this function gets called when visiting every store
22563 // node, so why not let the work be done on each store as it's visited?
22564 //
22565 // I believe this is mainly important because mergeConsecutiveStores
22566 // is unable to deal with merging stores of different sizes, so unless
22567 // we improve the chains of all the potential candidates up-front
22568 // before running mergeConsecutiveStores, it might only see some of
22569 // the nodes that will eventually be candidates, and then not be able
22570 // to go from a partially-merged state to the desired final
22571 // fully-merged state.
22572 
22573 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
22574   SmallVector<StoreSDNode *, 8> ChainedStores;
22575   StoreSDNode *STChain = St;
22576   // Intervals records which offsets from BaseIndex have been covered. In
22577   // the common case, every store writes to the immediately previous address
22578   // space and thus merged with the previous interval at insertion time.
22579 
22580   using IMap =
22581       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
22582   IMap::Allocator A;
22583   IMap Intervals(A);
22584 
22585   // This holds the base pointer, index, and the offset in bytes from the base
22586   // pointer.
22587   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22588 
22589   // We must have a base and an offset.
22590   if (!BasePtr.getBase().getNode())
22591     return false;
22592 
22593   // Do not handle stores to undef base pointers.
22594   if (BasePtr.getBase().isUndef())
22595     return false;
22596 
22597   // BaseIndexOffset assumes that offsets are fixed-size, which
22598   // is not valid for scalable vectors where the offsets are
22599   // scaled by `vscale`, so bail out early.
22600   if (St->getMemoryVT().isScalableVector())
22601     return false;
22602 
22603   // Add ST's interval.
22604   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
22605 
22606   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
22607     // If the chain has more than one use, then we can't reorder the mem ops.
22608     if (!SDValue(Chain, 0)->hasOneUse())
22609       break;
22610     // TODO: Relax for unordered atomics (see D66309)
22611     if (!Chain->isSimple() || Chain->isIndexed())
22612       break;
22613 
22614     // Find the base pointer and offset for this memory node.
22615     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
22616     // Check that the base pointer is the same as the original one.
22617     int64_t Offset;
22618     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
22619       break;
22620     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
22621     // Make sure we don't overlap with other intervals by checking the ones to
22622     // the left or right before inserting.
22623     auto I = Intervals.find(Offset);
22624     // If there's a next interval, we should end before it.
22625     if (I != Intervals.end() && I.start() < (Offset + Length))
22626       break;
22627     // If there's a previous interval, we should start after it.
22628     if (I != Intervals.begin() && (--I).stop() <= Offset)
22629       break;
22630     Intervals.insert(Offset, Offset + Length, Unit);
22631 
22632     ChainedStores.push_back(Chain);
22633     STChain = Chain;
22634   }
22635 
22636   // If we didn't find a chained store, exit.
22637   if (ChainedStores.size() == 0)
22638     return false;
22639 
22640   // Improve all chained stores (St and ChainedStores members) starting from
22641   // where the store chain ended and return single TokenFactor.
22642   SDValue NewChain = STChain->getChain();
22643   SmallVector<SDValue, 8> TFOps;
22644   for (unsigned I = ChainedStores.size(); I;) {
22645     StoreSDNode *S = ChainedStores[--I];
22646     SDValue BetterChain = FindBetterChain(S, NewChain);
22647     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
22648         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
22649     TFOps.push_back(SDValue(S, 0));
22650     ChainedStores[I] = S;
22651   }
22652 
22653   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
22654   SDValue BetterChain = FindBetterChain(St, NewChain);
22655   SDValue NewST;
22656   if (St->isTruncatingStore())
22657     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
22658                               St->getBasePtr(), St->getMemoryVT(),
22659                               St->getMemOperand());
22660   else
22661     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
22662                          St->getBasePtr(), St->getMemOperand());
22663 
22664   TFOps.push_back(NewST);
22665 
22666   // If we improved every element of TFOps, then we've lost the dependence on
22667   // NewChain to successors of St and we need to add it back to TFOps. Do so at
22668   // the beginning to keep relative order consistent with FindBetterChains.
22669   auto hasImprovedChain = [&](SDValue ST) -> bool {
22670     return ST->getOperand(0) != NewChain;
22671   };
22672   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
22673   if (AddNewChain)
22674     TFOps.insert(TFOps.begin(), NewChain);
22675 
22676   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
22677   CombineTo(St, TF);
22678 
22679   // Add TF and its operands to the worklist.
22680   AddToWorklist(TF.getNode());
22681   for (const SDValue &Op : TF->ops())
22682     AddToWorklist(Op.getNode());
22683   AddToWorklist(STChain);
22684   return true;
22685 }
22686 
22687 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
22688   if (OptLevel == CodeGenOpt::None)
22689     return false;
22690 
22691   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
22692 
22693   // We must have a base and an offset.
22694   if (!BasePtr.getBase().getNode())
22695     return false;
22696 
22697   // Do not handle stores to undef base pointers.
22698   if (BasePtr.getBase().isUndef())
22699     return false;
22700 
22701   // Directly improve a chain of disjoint stores starting at St.
22702   if (parallelizeChainedStores(St))
22703     return true;
22704 
22705   // Improve St's Chain..
22706   SDValue BetterChain = FindBetterChain(St, St->getChain());
22707   if (St->getChain() != BetterChain) {
22708     replaceStoreChain(St, BetterChain);
22709     return true;
22710   }
22711   return false;
22712 }
22713 
22714 /// This is the entry point for the file.
22715 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
22716                            CodeGenOpt::Level OptLevel) {
22717   /// This is the main entry point to this class.
22718   DAGCombiner(*this, AA, OptLevel).Run(Level);
22719 }
22720